Spaces:

AleksBlacky
/

Science_topic_classifier

Runtime error

App Files Files Community

AleksBlacky commited on Oct 24, 2022

Commit

de73359

1 Parent(s): 6afc0d2

model api in different file

Browse files

Files changed (4) hide show

__pycache__/model.cpython-39.pyc +0 -0
app.py +8 -39
model.py +32 -0
model_api.py +0 -17

__pycache__/model.cpython-39.pyc ADDED Viewed

Binary file (1.55 kB). View file

app.py CHANGED Viewed

@@ -1,20 +1,13 @@
 import streamlit as st
-import pickle
 from pandas import DataFrame
-import transformers
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import seaborn as sns
 st.markdown("# Hello, friend!")
 st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
-# st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
-st.write("Loading tokenizer and dict")
-model_name_global = "allenai/scibert_scivocab_uncased"
-tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
-with open('./models/scibert/decode_dict.pkl', 'rb') as f:
-    decode_dict = pickle.load(f)
 with st.form(key="my_form"):
     st.markdown("### 🎈 Do you want a little magic?  ")
@@ -63,38 +56,14 @@ if not submit_button:
     st.stop()
-#  allow_output_mutation=True
-@st.cache(suppress_st_warning=True)
-def load_model():
-    st.write("Loading big model")
-    return AutoModelForSequenceClassification.from_pretrained("models/scibert/")
-def make_predict(tokens, decode_dict):
-    # tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
-    # tokens = tokenizer_(title + abstract, return_tensors="pt")
-    model_ = load_model()
-    outs = model_(tokens.input_ids)
-    probs = outs["logits"].softmax(dim=-1).tolist()[0]
-    topic_probs = {}
-    for i, p in enumerate(probs):
-        if p > 0.1:
-            topic_probs[decode_dict[i]] = p
-    return topic_probs
-model_local = "models/scibert/"
 title = doc_title
 abstract = doc_abstract
-try:
-    tokens = tokenizer_(title + abstract, return_tensors="pt")
-except ValueError:
-    st.error("Word parsing into tokens went wrong! Is input valid? If yes, pls contact author [email protected]")
-predicts = make_predict(tokens, decode_dict)
 st.markdown("## 🎈 Yor article probably about:  ")
 st.header("")

 import streamlit as st
 from pandas import DataFrame
 import seaborn as sns
+from model import ArxivClassifierModel
 st.markdown("# Hello, friend!")
 st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
+# st.write("Loading model")
+model = ArxivClassifierModel()
 with st.form(key="my_form"):
     st.markdown("### 🎈 Do you want a little magic?  ")
     st.stop()
 title = doc_title
 abstract = doc_abstract
+# try:
+#     tokens = tokenizer_(title + abstract, return_tensors="pt")
+# except ValueError:
+#     st.error("Word parsing into tokens went wrong! Is input valid? If yes, pls contact author [email protected]")
+predicts = model.make_predict(title + abstract)
 st.markdown("## 🎈 Yor article probably about:  ")
 st.header("")

model.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import streamlit as st
+import pickle
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+class ArxivClassifierModel():
+    def __init__(self):
+        self.model = self.__load_model()
+        model_name_global = "allenai/scibert_scivocab_uncased"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name_global)
+        with open('./models/scibert/decode_dict.pkl', 'rb') as f:
+            self.decode_dict = pickle.load(f)
+    def make_predict(self, text):
+        # tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
+        tokens = self.tokenizer(text, return_tensors="pt")
+        outs = self.model(tokens.input_ids)
+        probs = outs["logits"].softmax(dim=-1).tolist()[0]
+        topic_probs = {}
+        for i, p in enumerate(probs):
+            if p > 0.1:
+                topic_probs[self.decode_dict[i]] = p
+        return topic_probs
+    #  allow_output_mutation=True
+    @st.cache(suppress_st_warning=True)
+    def __load_model(self):
+        st.write("Loading big model")
+        return AutoModelForSequenceClassification.from_pretrained("models/scibert/")

model_api.py DELETED Viewed

@@ -1,17 +0,0 @@
-#
-#
-# def make_predict(model_name_global, model_local, decode_dict, title, abstract):
-#     model_name_global="allenai/scibert_scivocab_uncased"
-#     model_local="scibert_trainer/checkpoint-2000/"
-#
-#     tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
-#     tokens = tokenizer_(title + abstract, return_tensors="pt")
-#     model_ = AutoModelForSequenceClassification.from_pretrained(model_local)
-#     outs = model_(tokens.input_ids)
-#
-#     probs = outs["logits"].softmax(dim=-1).tolist()[0]
-#     topic_probs = {}
-#     for i, p in enumerate(probs):
-#         if p > 0.1:
-#             topic_probs[decode_dict[i]] = p
-#     return topic_probs