| | import gradio as gr |
| | import rebiber |
| | import os |
| | import uuid |
| | |
| |
|
| | |
| | filepath = os.path.abspath(rebiber.__file__).replace("__init__.py","") |
| | bib_list_path = os.path.join(filepath, "bib_list.txt") |
| | abbr_tsv_path = "abbr.tsv" |
| |
|
| | bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath) |
| |
|
| | abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path) |
| |
|
| |
|
| | def process(input_bib, shorten, remove_keys, deduplicate, sort): |
| | if "@" not in input_bib: |
| | return "N/A" |
| | global abbr_dict |
| | |
| | random_id = uuid.uuid4().hex |
| | with open(f"input_{random_id}.bib", "w") as f: |
| | f.write(input_bib.replace("\t", " ")) |
| | all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib") |
| | print("# Input Bib Entries:", len(all_bib_entries)) |
| | abbr_dict_pass = [] |
| | if shorten: |
| | abbr_dict_pass = abbr_dict |
| | rebiber.normalize_bib(bib_db, all_bib_entries, f"output_{random_id}.bib", |
| | abbr_dict=abbr_dict_pass, |
| | deduplicate=deduplicate, |
| | sort=sort, |
| | removed_value_names=remove_keys) |
| | with open(f"output_{random_id}.bib") as f: |
| | output_bib = f.read().replace("\n ", "\n ") |
| | |
| | |
| | return output_bib, random_id, gr.update(visible=True) |
| |
|
| |
|
| | example_input = """ |
| | @article{lin2020birds, |
| | title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models}, |
| | author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang}, |
| | journal={arXiv preprint arXiv:2005.00683}, |
| | year={2020} |
| | } |
| | @inproceedings{Lin2020CommonGenAC, |
| | title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning}, |
| | author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren}, |
| | booktitle={Findings}, |
| | year={2020} |
| | } |
| | """ |
| |
|
| | examples = [[example_input]] |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| |
|
| |
|
| |
|
| | with gr.Blocks() as demo: |
| | |
| | gr.Markdown( |
| | '''# Rebiber: A tool for normalizing bibtex with official info. |
| | <table> |
| | <tr> |
| | <td> |
| | <a href="https://yuchenlin.xyz/"> |
| | <img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social"> |
| | </a> |
| | </td> |
| | <td> |
| | <a href="https://github.com/yuchenlin/rebiber"> |
| | <img src="https://img.shields.io/badge/Github--blue?style=social&logo=github"> |
| | </a> |
| | </td> |
| | <td> |
| | <a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20"> |
| | <img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter"> |
| | </a> |
| | </td> |
| | </tr> |
| | </table> |
| | <span style="font-size:13pt"> |
| | |
| | We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences. |
| | We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)! |
| | Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection. |
| | |
| | </span> |
| | ''' |
| | ) |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=3): |
| | input_bib = gr.Textbox(lines=15, label="Input BIB", value=example_input, interactive=True) |
| | removekeys = gr.CheckboxGroup(["url", "biburl", "address", "publisher", "pages", "doi", "volume", "bibsource"], |
| | value=[False, False, False, False, False, False, False, False], |
| | label="Remove Keys", info="Which keys to remove?") |
| | shorten = gr.Checkbox(label="Abbreviation", info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')", value=False) |
| | dedup = gr.Checkbox(label="Deduplicate entries.", value=False) |
| | sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False) |
| | with gr.Row(): |
| | clr_button = gr.Button("Clear") |
| | button = gr.Button("Submit") |
| | ex_uuid = gr.Text(label="UUID") |
| | ex_uuid.visible = False |
| | with gr.Column(scale=3): |
| | output=gr.Textbox(label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)").style(show_copy_button=True, interactive=False) |
| | download_btn = gr.Button("Generate Bib File") |
| | download_btn.visible = False |
| | download_content = gr.outputs.File() |
| | download_content.visible = False |
| | def download_file(ex_uuid): |
| | global download_content |
| | |
| | file_path = f"output_{ex_uuid}.bib" |
| | download_content.update(visible=False) |
| | return file_path, gr.update(visible=True) |
| | download_btn.click(download_file, inputs=ex_uuid, outputs=[download_content,download_content]) |
| | button.click(process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output, ex_uuid, download_btn], api_name = "process") |
| | def clean(text): |
| | return "" |
| | clr_button.click(clean, input_bib, input_bib) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|
| |
|
| | """ |
| | @article{lin2020birds, |
| | title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models}, |
| | author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang}, |
| | journal={arXiv preprint arXiv:2005.00683}, |
| | year={2020} |
| | } |
| | |
| | @inproceedings{lin2020birds, |
| | address = {Online}, |
| | author = {Lin, Bill Yuchen and |
| | Lee, Seyeon and |
| | Khanna, Rahul and |
| | Ren, Xiang}, |
| | booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, |
| | doi = {10.18653/v1/2020.emnlp-main.557}, |
| | pages = {6862--6868}, |
| | publisher = {Association for Computational Linguistics}, |
| | title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels}, |
| | url = {https://aclanthology.org/2020.emnlp-main.557}, |
| | year = {2020} |
| | } |
| | """ |