AleksBlacky commited on
Commit
de73359
Β·
1 Parent(s): 6afc0d2

model api in different file

Browse files
Files changed (4) hide show
  1. __pycache__/model.cpython-39.pyc +0 -0
  2. app.py +8 -39
  3. model.py +32 -0
  4. model_api.py +0 -17
__pycache__/model.cpython-39.pyc ADDED
Binary file (1.55 kB). View file
 
app.py CHANGED
@@ -1,20 +1,13 @@
1
  import streamlit as st
2
- import pickle
3
-
4
  from pandas import DataFrame
5
- import transformers
6
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  import seaborn as sns
 
8
 
9
  st.markdown("# Hello, friend!")
10
  st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
11
- # st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
12
 
13
- st.write("Loading tokenizer and dict")
14
- model_name_global = "allenai/scibert_scivocab_uncased"
15
- tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
16
- with open('./models/scibert/decode_dict.pkl', 'rb') as f:
17
- decode_dict = pickle.load(f)
18
 
19
  with st.form(key="my_form"):
20
  st.markdown("### 🎈 Do you want a little magic? ")
@@ -63,38 +56,14 @@ if not submit_button:
63
  st.stop()
64
 
65
 
66
- # allow_output_mutation=True
67
- @st.cache(suppress_st_warning=True)
68
- def load_model():
69
- st.write("Loading big model")
70
- return AutoModelForSequenceClassification.from_pretrained("models/scibert/")
71
-
72
-
73
- def make_predict(tokens, decode_dict):
74
- # tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
75
- # tokens = tokenizer_(title + abstract, return_tensors="pt")
76
-
77
- model_ = load_model()
78
- outs = model_(tokens.input_ids)
79
-
80
- probs = outs["logits"].softmax(dim=-1).tolist()[0]
81
- topic_probs = {}
82
- for i, p in enumerate(probs):
83
- if p > 0.1:
84
- topic_probs[decode_dict[i]] = p
85
- return topic_probs
86
-
87
-
88
- model_local = "models/scibert/"
89
-
90
  title = doc_title
91
  abstract = doc_abstract
92
- try:
93
- tokens = tokenizer_(title + abstract, return_tensors="pt")
94
- except ValueError:
95
- st.error("Word parsing into tokens went wrong! Is input valid? If yes, pls contact author [email protected]")
96
 
97
- predicts = make_predict(tokens, decode_dict)
98
 
99
  st.markdown("## 🎈 Yor article probably about: ")
100
  st.header("")
 
1
  import streamlit as st
 
 
2
  from pandas import DataFrame
 
 
3
  import seaborn as sns
4
+ from model import ArxivClassifierModel
5
 
6
  st.markdown("# Hello, friend!")
7
  st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
 
8
 
9
+ # st.write("Loading model")
10
+ model = ArxivClassifierModel()
 
 
 
11
 
12
  with st.form(key="my_form"):
13
  st.markdown("### 🎈 Do you want a little magic? ")
 
56
  st.stop()
57
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  title = doc_title
60
  abstract = doc_abstract
61
+ # try:
62
+ # tokens = tokenizer_(title + abstract, return_tensors="pt")
63
+ # except ValueError:
64
+ # st.error("Word parsing into tokens went wrong! Is input valid? If yes, pls contact author [email protected]")
65
 
66
+ predicts = model.make_predict(title + abstract)
67
 
68
  st.markdown("## 🎈 Yor article probably about: ")
69
  st.header("")
model.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+
5
+ class ArxivClassifierModel():
6
+
7
+ def __init__(self):
8
+ self.model = self.__load_model()
9
+
10
+ model_name_global = "allenai/scibert_scivocab_uncased"
11
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name_global)
12
+ with open('./models/scibert/decode_dict.pkl', 'rb') as f:
13
+ self.decode_dict = pickle.load(f)
14
+
15
+ def make_predict(self, text):
16
+ # tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
17
+ tokens = self.tokenizer(text, return_tensors="pt")
18
+
19
+ outs = self.model(tokens.input_ids)
20
+
21
+ probs = outs["logits"].softmax(dim=-1).tolist()[0]
22
+ topic_probs = {}
23
+ for i, p in enumerate(probs):
24
+ if p > 0.1:
25
+ topic_probs[self.decode_dict[i]] = p
26
+ return topic_probs
27
+
28
+ # allow_output_mutation=True
29
+ @st.cache(suppress_st_warning=True)
30
+ def __load_model(self):
31
+ st.write("Loading big model")
32
+ return AutoModelForSequenceClassification.from_pretrained("models/scibert/")
model_api.py DELETED
@@ -1,17 +0,0 @@
1
- #
2
- #
3
- # def make_predict(model_name_global, model_local, decode_dict, title, abstract):
4
- # model_name_global="allenai/scibert_scivocab_uncased"
5
- # model_local="scibert_trainer/checkpoint-2000/"
6
- #
7
- # tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
8
- # tokens = tokenizer_(title + abstract, return_tensors="pt")
9
- # model_ = AutoModelForSequenceClassification.from_pretrained(model_local)
10
- # outs = model_(tokens.input_ids)
11
- #
12
- # probs = outs["logits"].softmax(dim=-1).tolist()[0]
13
- # topic_probs = {}
14
- # for i, p in enumerate(probs):
15
- # if p > 0.1:
16
- # topic_probs[decode_dict[i]] = p
17
- # return topic_probs