Spaces:
Running
Running
| import gradio as gr | |
| import numpy as np | |
| import urllib.parse | |
| import traceback | |
| from ocr_engine import ocr_items, ocr_text | |
| from keyword_engine import build_focus_text, extract_keywords_from_focus | |
| def build_link(keywords, full_text): | |
| if keywords: | |
| query = " ".join(keywords) | |
| shown = keywords | |
| else: | |
| query = (full_text or "")[:120] | |
| shown = ["(dùng văn bản gốc)"] | |
| url = "https://www.google.com/search?q=" + urllib.parse.quote(query) | |
| html = f""" | |
| <div style="text-align:center;padding:20px;border-radius:10px; | |
| background:#f0f9ff;border:1px solid #cce3de"> | |
| <h3 style="color:#d90429">{', '.join(shown)}</h3> | |
| <a href="{url}" target="_blank" | |
| style="background:#4285F4;color:white;padding:10px 20px; | |
| border-radius:5px;font-weight:bold;text-decoration:none"> | |
| Tra cứu Google | |
| </a> | |
| </div> | |
| """ | |
| return html, ", ".join(shown) | |
| def process(image): | |
| if image is None: | |
| return "Chưa có ảnh đầu vào.", "", "" | |
| try: | |
| if not isinstance(image, np.ndarray): | |
| return "Định dạng ảnh không hợp lệ.", "", "" | |
| full_text = ocr_text(image) | |
| if not full_text: | |
| return "Không trích xuất được nội dung.", "", "" | |
| items = ocr_items(image) | |
| focus_text = build_focus_text(items, img_h=image.shape[0], top_ratio=0.35, max_lines=30) | |
| if not focus_text: | |
| focus_text = full_text | |
| keywords = extract_keywords_from_focus(focus_text, top_n=5) | |
| html_link, keywords_str = build_link(keywords, full_text) | |
| return full_text, keywords_str, html_link | |
| except Exception as e: | |
| return f"Lỗi hệ thống:\n{e}\n{traceback.format_exc()}", "", "" | |
| with gr.Blocks(title="OCR & Tra cứu thông minh") as app: | |
| gr.Markdown("# OCR – PaddleOCR + KeyBERT") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_image = gr.Image(label="Tải ảnh", type="numpy") | |
| btn_submit = gr.Button("Bắt đầu trích xuất", variant="primary") | |
| with gr.Column(scale=1): | |
| output_link = gr.HTML(label="Link tra cứu") | |
| output_text = gr.Textbox(label="Văn bản OCR", lines=10) | |
| output_keywords = gr.Textbox(label="Từ khóa", lines=5) | |
| btn_submit.click( | |
| fn=process, | |
| inputs=input_image, | |
| outputs=[output_text, output_keywords, output_link], | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() | |