Spaces:

AleksBlacky
/

Science_topic_classifier

Runtime error

App Files Files Community

AleksBlacky commited on Oct 25, 2022

Commit

3722795

1 Parent(s): 8cf1f84

change ui, added readable topics names

Browse files

Files changed (5) hide show

__pycache__/model.cpython-39.pyc +0 -0
app.py +10 -22
model.py +12 -38
models/maintopic_clf/main_topic_dict.pkl +0 -0
models/scibert/topic_dict.pkl +0 -0

__pycache__/model.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/model.cpython-39.pyc and b/__pycache__/model.cpython-39.pyc differ

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import streamlit as st
 from pandas import DataFrame
 import seaborn as sns
-from model import ArxivClassifierModel, ArxivClassifierModelsPipeline
 st.markdown("# Hello, friend!")
 st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
@@ -15,12 +15,12 @@ with st.form(key="my_form"):
     with c2:
         doc_title = st.text_area(
-            "Paste your abstract title below (max 100 words)",
             height=210,
         )
         doc_abstract = st.text_area(
-            "Paste your abstract text below (max 100500 words)",
             height=410,
         )
@@ -44,7 +44,7 @@ with st.form(key="my_form"):
                 "⚠️ Your abstract contains "
                 + str(len_abstract)
                 + " words."
-                + " Only the first 50 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
             )
             doc_abstract = doc_abstract[:MAX_WORDS_ABSTRACT]
@@ -68,18 +68,12 @@ st.markdown("## 🎈 Yor article probably about:  ")
 st.header("")
 df = (
-    DataFrame(preds_topic.items(), columns=["Topic", "Prob"])
-        .sort_values(by="Prob", ascending=False)
         .reset_index(drop=True)
 )
 df.index += 1
-df2 = (
-    DataFrame(preds_maintopic.items(), columns=["Topic", "Prob"])
-        .sort_values(by="Prob", ascending=False)
-        .reset_index(drop=True)
-)
-df2.index += 1
 # Add styling
 cmGreen = sns.light_palette("green", as_cmap=True)
@@ -87,27 +81,21 @@ cmRed = sns.light_palette("red", as_cmap=True)
 df = df.style.background_gradient(
     cmap=cmGreen,
     subset=[
-        "Prob",
-    ],
-)
-df2 = df2.style.background_gradient(
-    cmap=cmGreen,
-    subset=[
-        "Prob",
     ],
 )
 c1, c2, c3 = st.columns([1, 3, 1])
 format_dictionary = {
-    "Prob": "{:.1%}",
 }
 df = df.format(format_dictionary)
-df2 = df2.format(format_dictionary)
 with c2:
     st.markdown("#### We suppose your research about:  ")
-    st.table(df2)
     st.markdown("##### More detailed, it's about topic:  ")
     st.table(df)

 import streamlit as st
 from pandas import DataFrame
 import seaborn as sns
+from model import ArxivClassifierModelsPipeline
 st.markdown("# Hello, friend!")
 st.markdown(" This magic application going to help you with understanding of science paper topic! Cool? Yeah! ")
     with c2:
         doc_title = st.text_area(
+            "Paste your paper's title below (max 100 words)",
             height=210,
         )
         doc_abstract = st.text_area(
+            "Paste your paper's abstract text below (max 100500 words)",
             height=410,
         )
                 "⚠️ Your abstract contains "
                 + str(len_abstract)
                 + " words."
+                + " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
             )
             doc_abstract = doc_abstract[:MAX_WORDS_ABSTRACT]
 st.header("")
 df = (
+    DataFrame(preds_topic.items(), columns=["Topic", "Probability"])
+        .sort_values(by="Probability", ascending=False)
         .reset_index(drop=True)
 )
 df.index += 1
 # Add styling
 cmGreen = sns.light_palette("green", as_cmap=True)
 df = df.style.background_gradient(
     cmap=cmGreen,
     subset=[
+        "Probability",
     ],
 )
 c1, c2, c3 = st.columns([1, 3, 1])
 format_dictionary = {
+    "Probability": "{:.1%}",
 }
 df = df.format(format_dictionary)
 with c2:
     st.markdown("#### We suppose your research about:  ")
+    st.markdown(f"### {preds_maintopic}! ")
+    st.markdown(f"Wow, we're impressed, are you addicted to {preds_maintopic.lower()}?! Coool! ")
     st.markdown("##### More detailed, it's about topic:  ")
     st.table(df)

model.py CHANGED Viewed

@@ -2,36 +2,6 @@ import streamlit as st
 import pickle
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-class ArxivClassifierModel():
-    def __init__(self):
-        self.model = self.__load_model()
-        model_name_global = "allenai/scibert_scivocab_uncased"
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name_global)
-        with open('./models/scibert/decode_dict.pkl', 'rb') as f:
-            self.decode_dict = pickle.load(f)
-    def make_predict(self, text):
-        # tokenizer_ = AutoTokenizer.from_pretrained(model_name_global)
-        tokens = self.tokenizer(text, return_tensors="pt")
-        outs = self.model(tokens.input_ids)
-        probs = outs["logits"].softmax(dim=-1).tolist()[0]
-        topic_probs = {}
-        for i, p in enumerate(probs):
-            if p > 0.1:
-                topic_probs[self.decode_dict[i]] = p
-        return topic_probs
-    #  allow_output_mutation=True
-    @st.cache(suppress_st_warning=True)
-    def __load_model(self):
-        st.write("Loading big model")
-        return AutoModelForSequenceClassification.from_pretrained("models/scibert/")
 class ArxivClassifierModelsPipeline():
@@ -51,6 +21,12 @@ class ArxivClassifierModelsPipeline():
         with open('models/maintopic_clf/decode_dict_maintopic.pkl', 'rb') as f:
             self.decode_dict_maintopic = pickle.load(f)
     def make_predict(self, text):
         tokens_topic = self.topic_tokenizer(text, return_tensors="pt")
         topic_outs = self.model_topic_clf(tokens_topic.input_ids)
@@ -58,19 +34,17 @@ class ArxivClassifierModelsPipeline():
         topic_probs = {}
         for i, p in enumerate(probs_topic):
             if p > 0.1:
-                topic_probs[self.decode_dict_topic[i]] = p
         tokens_maintopic = self.maintopic_tokenizer(text, return_tensors="pt")
         maintopic_outs = self.model_maintopic_clf(tokens_maintopic.input_ids)
         probs_maintopic = maintopic_outs["logits"].softmax(dim=-1).tolist()[0]
-        maintopic_probs = {}
-        for i, p in enumerate(probs_maintopic):
-            if p > 0.1:
-                maintopic_probs[self.decode_dict_maintopic[i]] = p
-        return topic_probs, maintopic_probs
     @st.cache(suppress_st_warning=True)
     def __load_topic_clf(self):

 import pickle
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 class ArxivClassifierModelsPipeline():
         with open('models/maintopic_clf/decode_dict_maintopic.pkl', 'rb') as f:
             self.decode_dict_maintopic = pickle.load(f)
+        with open('models/maintopic_clf/main_topic_dict.pkl', 'rb') as f:
+            self.main_topic_dict = pickle.load(f)
+        with open('models/scibert/topic_dict.pkl', 'rb') as f:
+            self.topic_dict = pickle.load(f)
     def make_predict(self, text):
         tokens_topic = self.topic_tokenizer(text, return_tensors="pt")
         topic_outs = self.model_topic_clf(tokens_topic.input_ids)
         topic_probs = {}
         for i, p in enumerate(probs_topic):
             if p > 0.1:
+                if self.decode_dict_topic[i] in self.topic_dict:
+                    topic_probs[self.topic_dict[self.decode_dict_topic[i]]] = p
+                else:
+                    topic_probs[self.decode_dict_topic[i]] = p
         tokens_maintopic = self.maintopic_tokenizer(text, return_tensors="pt")
         maintopic_outs = self.model_maintopic_clf(tokens_maintopic.input_ids)
         probs_maintopic = maintopic_outs["logits"].softmax(dim=-1).tolist()[0]
+        maintopic_probs = self.decode_dict_maintopic[0]
+        return topic_probs, self.main_topic_dict[maintopic_probs]
     @st.cache(suppress_st_warning=True)
     def __load_topic_clf(self):

models/maintopic_clf/main_topic_dict.pkl ADDED Viewed

Binary file (663 Bytes). View file

models/scibert/topic_dict.pkl ADDED Viewed

Binary file (1.8 kB). View file