Spaces:
Sleeping
Sleeping
Mert Şengil
commited on
Commit
·
ae19be7
1
Parent(s):
19995b3
Add filtering to show only aspect terms present in original text
Browse files
app.py
CHANGED
|
@@ -25,6 +25,16 @@ def is_valid_aspect(word):
|
|
| 25 |
word.isalpha()
|
| 26 |
)
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
def extract_and_rank_aspects(text, max_tokens=64, beams=5):
|
| 29 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(DEVICE)
|
| 30 |
|
|
@@ -46,7 +56,10 @@ def extract_and_rank_aspects(text, max_tokens=64, beams=5):
|
|
| 46 |
all_terms = []
|
| 47 |
for pred in all_predictions:
|
| 48 |
candidates = re.split(r"[;,–—\-]|(?:\s*,\s*)", pred)
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
ranked = Counter(all_terms).most_common()
|
| 52 |
return ranked
|
|
@@ -93,7 +106,7 @@ with gr.Blocks(title="🇹🇷 Türkçe Aspect Term Extraction", theme=gr.themes
|
|
| 93 |
with gr.Column():
|
| 94 |
output = gr.Markdown(
|
| 95 |
label="📊 Sonuçlar",
|
| 96 |
-
value="Sonuçlar
|
| 97 |
)
|
| 98 |
|
| 99 |
# Example texts
|
|
|
|
| 25 |
word.isalpha()
|
| 26 |
)
|
| 27 |
|
| 28 |
+
def is_aspect_in_text(aspect_term, original_text):
|
| 29 |
+
"""Aspect term'in orijinal metinde geçip geçmediğini kontrol eder"""
|
| 30 |
+
# Case-insensitive karşılaştırma
|
| 31 |
+
text_lower = original_text.lower()
|
| 32 |
+
aspect_lower = aspect_term.lower()
|
| 33 |
+
|
| 34 |
+
# Word boundary ile tam kelime araması
|
| 35 |
+
pattern = r'\b' + re.escape(aspect_lower) + r'\b'
|
| 36 |
+
return bool(re.search(pattern, text_lower, re.IGNORECASE))
|
| 37 |
+
|
| 38 |
def extract_and_rank_aspects(text, max_tokens=64, beams=5):
|
| 39 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(DEVICE)
|
| 40 |
|
|
|
|
| 56 |
all_terms = []
|
| 57 |
for pred in all_predictions:
|
| 58 |
candidates = re.split(r"[;,–—\-]|(?:\s*,\s*)", pred)
|
| 59 |
+
# Sadece orijinal metinde geçen aspect term'leri ekle
|
| 60 |
+
for candidate in candidates:
|
| 61 |
+
if is_valid_aspect(candidate) and is_aspect_in_text(candidate.strip(), text):
|
| 62 |
+
all_terms.append(candidate.strip().lower())
|
| 63 |
|
| 64 |
ranked = Counter(all_terms).most_common()
|
| 65 |
return ranked
|
|
|
|
| 106 |
with gr.Column():
|
| 107 |
output = gr.Markdown(
|
| 108 |
label="📊 Sonuçlar",
|
| 109 |
+
value="📊 Sonuçlar"
|
| 110 |
)
|
| 111 |
|
| 112 |
# Example texts
|