Spaces:
Running
Running
File size: 8,931 Bytes
dab52ad 2c70432 2fe91c7 bc28cbe 2c70432 a24182d 2c70432 bc28cbe 2c70432 bc28cbe dab52ad 2c70432 dab52ad 2c70432 d521ae9 2c70432 dab52ad 2c70432 d521ae9 2c70432 d521ae9 2c70432 dab52ad 2c70432 2fe91c7 3f3a9d1 91d83af 3f3a9d1 bc28cbe 2c70432 bc28cbe 2c70432 bc28cbe 2c70432 bc28cbe 2c70432 d521ae9 bc28cbe 2c70432 2fe91c7 bc28cbe 2c70432 bc28cbe 0cb2c79 a24182d bc28cbe d521ae9 2c70432 d521ae9 2c70432 d521ae9 2c70432 2fe91c7 2c70432 bc28cbe 2fe91c7 bc28cbe a24182d 0cb2c79 bc28cbe a24182d bc28cbe 2c70432 bc28cbe 2c70432 bc28cbe 2c70432 bc28cbe 2c70432 bc28cbe 2c70432 2fe91c7 2c70432 bc28cbe d521ae9 2fe91c7 dab52ad 2c70432 2fe91c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import gradio as gr
from sentence_transformers import SentenceTransformer
import torch
# ----------------------------- Load Models on Demand -----------------------------
_loaded_models = {}
tab_state = "single"
def load_model(model_name):
if model_name not in _loaded_models:
_loaded_models[model_name] = SentenceTransformer(model_name,trust_remote_code=True)
return _loaded_models[model_name]
# ----------------------------- Core Functions -----------------------------
def find_similar_documents(query, documents, model_name):
if not query.strip():
return "Please enter a query."
if not documents.strip():
return "Please enter documents (one per line)."
doc_list = [d.strip() for d in documents.split('\n') if d.strip()]
if not doc_list:
return "Please enter at least one document."
model = load_model(model_name)
query_embeddings = model.encode(query, convert_to_tensor=True)
doc_embeddings = model.encode(doc_list, convert_to_tensor=True)
similarities = torch.nn.functional.cosine_similarity(query_embeddings, doc_embeddings)
sorted_indices = torch.argsort(similarities, descending=True)
results = []
for i, idx in enumerate(sorted_indices):
score = similarities[idx].item()
doc = doc_list[idx]
results.append(f"{i+1}. Score: {score:.4f} \n  Document: {doc}")
return "\n\n".join(results)
def compare_models(query, documents, tarka_model, open_model):
tarka_result = find_similar_documents(query, documents, tarka_model)
open_result = find_similar_documents(query, documents, open_model)
return tarka_result, open_result
# ----------------------------- UI Layout -----------------------------
with gr.Blocks(
title="Tarka Embedding Explorer",
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo", font=["Poppins", "sans-serif"])
) as demo:
gr.Markdown("""
# π Tarka Embedding Explorer
Experiment with Tarka Embedding models for semantic similarity search, or compare them with top open-source baselines.
""")
with gr.Tabs():
# ---------------- Single Model Tab ----------------
with gr.Tab("πΉ Single Model Search"):
tab_state="single"
with gr.Row():
with gr.Column(scale=1):
model_selector = gr.Dropdown(
label="Select Model",
choices=[
"Tarka-AIR/Tarka-Embedding-150M-V1",
"Tarka-AIR/Tarka-Embedding-250M-V1",
"Tarka-AIR/Tarka-Embedding-350M-V1",
"sentence-transformers/all-MiniLM-L6-v2",
"intfloat/e5-base-v2",
"BAAI/bge-small-en-v1.5"
],
value="Tarka-AIR/Tarka-Embedding-150M-V1"
)
query_input = gr.Textbox(label="Query", placeholder="Enter your query...", lines=2)
docs_input = gr.Textbox(label="Documents", placeholder="Enter one document per line...", lines=10)
search_btn = gr.Button("π Search", variant="primary")
with gr.Column(scale=1):
result_box = gr.Markdown(label="Results")
search_btn.click(find_similar_documents, [query_input, docs_input, model_selector], result_box)
query_input.submit(find_similar_documents, [query_input, docs_input, model_selector], result_box)
examples = [
[
"Which planet is known as the Red Planet?",
"Venus is often called Earth's twin because of its similar size.\nMars, known for its reddish hue, is called the Red Planet.\nJupiter, the largest planet, has a red spot.\nSaturn has iconic rings."
],
[
"What causes seasons on Earth?",
"The tilt of Earth's axis causes different sunlight distribution.\nThe moon affects tides but not seasons.\nEarth's orbit has minimal effect on seasons.\nRotation causes day and night."
],
[
"What gas do plants release during photosynthesis?",
"Plants use sunlight to convert COβ into glucose and release oxygen.\nAnimals inhale oxygen and exhale COβ.\nPhotosynthesis occurs mainly in leaves."
]
]
gr.Examples(
examples=examples,
inputs=[query_input, docs_input],
label="Try Examples"
)
# ---------------- Comparison Tab ----------------
with gr.Tab("βοΈ Model Comparison"):
with gr.Row():
tab_state="comparsion"
with gr.Column(scale=1):
tarka_selector = gr.Dropdown(
label="Tarka Model",
choices=[
"Tarka-AIR/Tarka-Embedding-350M-V1",
"Tarka-AIR/Tarka-Embedding-250M-V1",
"Tarka-AIR/Tarka-Embedding-150M-V1",
],
value="Tarka-AIR/Tarka-Embedding-350M-V1"
)
open_selector = gr.Dropdown(
label="Open-Source Model",
choices=[
"sentence-transformers/all-MiniLM-L6-v2",
"intfloat/e5-base-v2",
"BAAI/bge-small-en-v1.5",
"sentence-transformers/paraphrase-mpnet-base-v2"
],
value="intfloat/e5-base-v2"
)
cmp_query = gr.Textbox(label="Query", placeholder="Enter your search query...", lines=2)
cmp_docs = gr.Textbox(label="Documents", placeholder="Enter one document per line...", lines=10)
cmp_btn = gr.Button("βοΈ Compare Models", variant="primary")
with gr.Column(scale=2):
gr.Markdown("### π§© Comparison Results")
with gr.Row(visible=False) as comparison_results:
with gr.Column():
tarka_label = gr.Markdown(visible=False)
tarka_output = gr.Markdown(visible=False)
with gr.Column():
open_label = gr.Markdown(visible=False)
open_output = gr.Markdown(visible=False)
examples = [
[
"Which planet is known as the Red Planet?",
"Venus is often called Earth's twin because of its similar size.\nMars, known for its reddish hue, is called the Red Planet.\nJupiter, the largest planet, has a red spot.\nSaturn has iconic rings."
],
[
"What causes seasons on Earth?",
"The tilt of Earth's axis causes different sunlight distribution.\nThe moon affects tides but not seasons.\nEarth's orbit has minimal effect on seasons.\nRotation causes day and night."
],
[
"What gas do plants release during photosynthesis?",
"Plants use sunlight to convert COβ into glucose and release oxygen.\nAnimals inhale oxygen and exhale COβ.\nPhotosynthesis occurs mainly in leaves."
]
]
gr.Examples(
examples=examples,
inputs=[cmp_query, cmp_docs],
label="Try Examples"
)
def run_comparison(query, docs, tarka_model, open_model):
tarka_res, open_res = compare_models(query, docs, tarka_model, open_model)
return (
gr.update(visible=True),
gr.update(value=f"### π§ {tarka_model}", visible=True),
gr.update(value=tarka_res, visible=True),
gr.update(value=f"### π {open_model}", visible=True),
gr.update(value=open_res, visible=True)
)
cmp_btn.click(
fn=run_comparison,
inputs=[cmp_query, cmp_docs, tarka_selector, open_selector],
outputs=[comparison_results, tarka_label, tarka_output, open_label, open_output]
)
cmp_query.submit(
fn=run_comparison,
inputs=[cmp_query, cmp_docs, tarka_selector, open_selector],
outputs=[comparison_results, tarka_label, tarka_output, open_label, open_output]
)
# ---------------- Example Section ----------------
# Launch the app
demo.launch() |