{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def score_report_bar():\n", " path=r\"C:\\Users\\Inception\\Desktop\\LLM-based-QA-chatbot-builder\\UI\\score_report\"\n", " import os\n", " import math\n", " dat=[]\n", " for x in os.listdir(path):\n", " wh=[]\n", " flag=0\n", " for x2 in x:\n", " if x2>='a' and x2<='z':\n", " flag=1\n", " wh.append(x2)\n", " elif flag==1:\n", " wh.append(\" \")\n", " wh=''.join(wh)\n", " wh=wh.replace(\"model ans\",\"\")\n", " wh=wh.replace(\"finetuned\",\"\")\n", " wh=wh.replace(\" \",\" \")\n", " wh=wh.replace(\"xlsx\",\"\")\n", " df_temp=pd.read_excel(os.path.join(path,x))\n", " rating=sum(df_temp[\"rating\"])/len(df_temp)\n", " dat.append({\n", " \"Model Name\":wh,\n", " \"Average Rating\":rating\n", " })\n", " temp=pd.DataFrame(dat)\n", " return temp" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import gradio as gr\n", "import pandas as pd\n", "\n", "def bar_plot_fn():\n", " temp=score_report_bar()\n", " return gr.BarPlot(\n", " temp,\n", " x=\"Model Name\",\n", " y=\"Average Rating\",\n", " x_title=\"Model name\",\n", " y_title=\"Average Rating\",\n", " title=\"Simple Bar Plot with made up data\",\n", " tooltip=[\"Model Name\", \"Average Rating\"],\n", " y_lim=[1, 5],\n", " width=200,\n", " height=1000\n", " )\n", "with gr.Blocks() as bar_plot:\n", " with gr.Row():\n", " btn=gr.Button(\"test\")\n", " with gr.Row():\n", " plot = gr.BarPlot()\n", " btn.click(bar_plot_fn, None, outputs=plot)\n", "\n", "bar_plot.launch()\t" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def parse_data(link,num=None): \n", " from bs4 import BeautifulSoup\n", " import requests\n", " import re\n", " from docx import Document \n", " from langchain_community.document_loaders import WebBaseLoader\n", " s=set()\n", " import time\n", " start_time = time.time()\n", " duration = 5\n", " def get_links(url):\n", " response = requests.get(url)\n", " data = response.text\n", " soup = BeautifulSoup(data, 'lxml')\n", "\n", " links = []\n", " for link in soup.find_all('a'):\n", " link_url = link.get('href')\n", " if link_url is not None and link_url.startswith('http'):\n", " s.add(link_url)\n", " links.append(link_url)\n", " \n", " return links\n", " # def write_to_file(links):\n", " # with open('data.txt', 'a') as f:\n", " # f.writelines(links)\n", " def get_all_links(url):\n", " for link in get_links(url):\n", " if (time.time() - start_time) >= duration:\n", " return\n", " get_all_links(link)\n", "\n", " def data_ret2(link):\n", " loader = WebBaseLoader(f\"{link}\")\n", " data = loader.load()\n", " return data[0].page_content\n", " # link = 'https://kuet.ac.bd'\n", " s.add(link)\n", " get_all_links(link)\n", " li=list(s)\n", " all_data=[]\n", " if num==None:\n", " num=len(li)\n", " for idx,x in enumerate(li):\n", " if idx==num:\n", " break\n", " try:\n", " print(\"Link: \",x)\n", " all_data.append(data_ret2(x))\n", " except:\n", " print(\"pass\")\n", " continue\n", " all_data2 = re.sub(r'\\n+', '\\n\\n', \"\\n\".join(all_data))\n", " document = Document()\n", " document.add_paragraph(all_data2)\n", " document.save('docx_file.docx')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Link: http://library.kuet.ac.bd/\n", "Link: http://kuet.portal.gov.bd/site/page/84728d9d-6059-41c4-940c-0f75eacf7d4c/Quarterly--semiannual-monitoring--evaluation-reports\n", "Link: https://kuet.ac.bd/index.php/welcome/shownews/943\n" ] } ], "source": [ "parse_data(\"https://kuet.ac.bd\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.getcwd()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df_all=[]\n", "for x in os.listdir(\"save_ques_ans\"):\n", " path=os.path.join(\"save_ques_ans\",x)\n", " df_all.append(pd.read_excel(path))\n", "df=pd.concat(df_all,axis=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "doc=[]\n", "for x in s:\n", " print(x)\n", " doc.extend(data_ret2(x))\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "doc" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoTokenizer, AutoModelForCausalLM\n", "if model_name==\"Mistral\":\n", " path=\"models/full_KUET_LLM_mistral\"\n", "elif model_name==\"Zepyhr\":\n", " path=\"models/full_KUET_LLM_zepyhr\"\n", "elif model_name==\"Llama2\":\n", " path=\"models/full_KUET_LLM_llama\" \n", "tokenizer = AutoTokenizer.from_pretrained(path)\n", "model = AutoModelForCausalLM.from_pretrained(path,\n", " device_map='auto',\n", " torch_dtype=torch.float16,\n", " use_auth_token=True,\n", " load_in_8bit=True,\n", " # load_in_4bit=True\n", " )\n", "model.push_to_hub(repo_id=f\"My_model_{model_name}\",token=hf)\n", "tokenizer.push_to_hub(repo_id=f\"My_model_{model_name}\",token=hf)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "# Merge all the evaluation loss lists\n", "eval_loss_lists = [\n", " [\n", " 0.07517127692699432,\n", " 0.07137121260166168,\n", " 0.06598775833845139,\n", " 0.0005441228277049959,\n", " 0.0002996980620082468,\n", " 0.00021371280308812857,\n", " 0.00028233605553396046,\n", " 9.069988300325349e-05,\n", " 7.004399230936542e-05,\n", " 9.137028973782435e-05,\n", " 5.340397547115572e-05,\n", " 5.0301870942348614e-05\n", " ],\n", " [\n", " 1.597152731847018e-05,\n", " 1.162805529020261e-05,\n", " 9.043936188390944e-06,\n", " 1.379685454594437e-05,\n", " 5.367660833144328e-06,\n", " 4.6886875679774676e-06,\n", " 4.490133960644016e-06,\n", " 6.136821866675746e-06,\n", " 3.3243470625166083e-06,\n", " 2.348009729757905e-06,\n", " 2.1804094103572425e-06,\n", " 1.958705070137512e-06\n", " ],\n", " [\n", " 3.93469099435606e-06,\n", " 1.65619246672577e-06,\n", " 1.1269650030953926e-06,\n", " 8.881219173417776e-07,\n", " 1.3077693665763945e-06,\n", " 7.212336186057655e-07,\n", " 6.988730092416517e-07,\n", " 5.00343162457284e-07,\n", " 4.1343139400851214e-07,\n", " 5.06081335061026e-07,\n", " 7.039822662591178e-07,\n", " 5.087575800644117e-07\n", " ],\n", " [\n", " 5.1233128033345565e-06,\n", " 1.3323343637239304e-06,\n", " 1.1789074960688595e-06,\n", " 1.0221098136753426e-06,\n", " 1.4271246300268103e-06,\n", " 1.0917949566646712e-06,\n", " 1.8720394336924073e-06,\n", " 0.00015229727432597429,\n", " 0.00016713247168809175,\n", " 7.280236604856327e-05,\n", " 5.6143608162528835e-06,\n", " 1.2813707144232467e-06\n", " ],\n", " [\n", " 1.7742066802384215e-06,\n", " 3.1642618978366954e-06,\n", " 2.774180939013604e-05,\n", " 7.504659606638597e-06,\n", " 1.0794157105920021e-06,\n", " 8.346623303623346e-07,\n", " 1.572396286064759e-06,\n", " 4.874376031693828e-07,\n", " 6.269995651564386e-07,\n", " 5.949763703938515e-07,\n", " 5.836409968651424e-07,\n", " 5.382337917581026e-07\n", " ],\n", " [\n", " 1.3506955838238355e-05,\n", " 2.3305697141040582e-06,\n", " 2.193627324231784e-06,\n", " 3.027681714229402e-07,\n", " 4.6904440864636854e-07,\n", " 4.6231170358623785e-07,\n", " 2.520739883493661e-07,\n", " 2.040175957063184e-07,\n", " 1.8624521658239246e-07,\n", " 4.635896289073571e-07,\n", " 2.6239982275910734e-07,\n", " 2.4372931761718064e-07\n", " ],\n", " [\n", " 5.271021564112743e-06,\n", " 3.550181190803414e-06,\n", " 2.5201459266099846e-06,\n", " 2.8312820177234244e-06,\n", " 1.4717104477313114e-06,\n", " 2.2729768716089893e-06,\n", " 1.030095177156909e-06,\n", " 1.0983015954479924e-06,\n", " 8.350090752173855e-07,\n", " 4.235817687003873e-05,\n", " 0.00017692078836262226,\n", " 5.840817902935669e-05\n", " ],\n", " [\n", " 1.2606010386662092e-06,\n", " 7.131714937713696e-06,\n", " 8.305702976940665e-06,\n", " 6.520267561427318e-07,\n", " 1.0400606953453462e-07,\n", " 1.2373440938517888e-07,\n", " 1.2282114880690642e-07,\n", " 1.4778217405364558e-07,\n", " 1.125305075788674e-07,\n", " 4.522570762333089e-08,\n", " 2.48692485911306e-05,\n", " 5.199101238417825e-08\n", " ],\n", " [\n", " 1.329818132944638e-06,\n", " 9.433363743482914e-07,\n", " 8.183121735783061e-07,\n", " 1.0200094493484357e-06,\n", " 7.936826023069443e-07,\n", " 7.760887115182413e-07,\n", " 2.45380675778506e-07,\n", " 0.0001625938602956012,\n", " 1.0732967581361663e-07,\n", " 1.0528655138841714e-06,\n", " 9.632424280425766e-07,\n", " 7.961476740092621e-07\n", " ],\n", " [\n", " 4.5500939904741244e-07,\n", " 7.533798793701862e-07,\n", " 4.7130234293035755e-07,\n", " 7.465733347089554e-07,\n", " 9.549980859446805e-07,\n", " 6.432795771615929e-07,\n", " 6.765155831089942e-07,\n", " 6.765155831089942e-07,\n", " 5.451398692457587e-07,\n", " 4.994994355911331e-07,\n", " 5.466189918479358e-07,\n", " 4.268927682460344e-07\n", " ],\n", " [\n", " 2.63293713942403e-07,\n", " 3.551216138930613e-07,\n", " 2.3628319922863739e-07,\n", " 9.180489541904535e-07,\n", " 1.1080908279836876e-06,\n", " 6.248191084523569e-07,\n", " 8.346111712853599e-07,\n", " 5.276984325064404e-07,\n", " 3.681239491015731e-07,\n", " 1.8970614235058747e-07,\n", " 3.114948299298703e-07,\n", " 2.9696289516323304e-07\n", " ],\n", " # [\n", " # 2.38517332036281e-05,\n", " # 3.9089650272217114e-07,\n", " # 6.718229883517779e-08,\n", " # 1.4773820566915674e-07,\n", " # 5.8338137876035034e-08,\n", " # 3.57102081238736e-08,\n", " # 2.2298079329630127e-06,\n", " # 3.583775196602801e-07,\n", " # 9.418199908850511e-08,\n", " # 1.338206288892252e-06,\n", " # 3.194011810592201e-07,\n", " # 2.245769792352803e-07\n", " # ],\n", " [\n", " 2.3522443370893598e-06,\n", " 1.1711344996001571e-06,\n", " 1.1321773172312533e-06,\n", " 5.756968448622501e-07,\n", " 4.4675923049908306e-07,\n", " 4.365276993212319e-07,\n", " 5.525398591998965e-07,\n", " 4.404951710057503e-07,\n", " 4.4630780848819995e-07,\n", " 4.764913796861947e-07,\n", " 4.10373701242861e-07,\n", " 3.762708331578324e-07\n", " ],\n", " [\n", " 2.1882451051169483e-07,\n", " 5.146034354197582e-08,\n", " 3.1587944704369875e-08,\n", " 1.122993165125763e-08,\n", " 8.033423704034703e-09,\n", " 7.330823059703562e-09,\n", " 2.0332389993882316e-08,\n", " 1.718821529550496e-08,\n", " 1.5028433608677005e-08,\n", " 3.9828059072988253e-08,\n", " 2.8266715190738978e-08,\n", " 2.1497044144780375e-08\n", " ],\n", " [\n", " 1.4871952558337398e-08,\n", " 1.2490186662716951e-08,\n", " 1.213749456496771e-08,\n", " 1.159435214503901e-08,\n", " 1.1296255486570317e-08,\n", " 1.1153668211250078e-08,\n", " 1.3103758966792611e-08,\n", " 1.2461796927709656e-08,\n", " 1.2030940688134706e-08,\n", " 1.306745200935211e-08,\n", " 1.029541429886649e-08,\n", " 9.854548288501519e-09\n", " ]\n", "]\n", "\n", "# Flatten the nested list\n", "merged_list = [item for sublist in eval_loss_lists for item in sublist]\n", "\n", "# Number of epochs\n", "epochs = 20\n", "\n", "# Plotting the evaluation loss curve\n", "plt.figure(figsize=(10, 6))\n", "plt.plot(range(1, len(merged_list) + 1), merged_list, marker='o')\n", "plt.title('Evaluation Loss Curve')\n", "plt.xlabel('Epoch')\n", "plt.ylabel('Evaluation Loss')\n", "plt.xticks(range(1, len(merged_list) + 1, len(merged_list) // epochs))\n", "plt.grid(True)\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "# Given numbers\n", "numbers = [\n", " 9.069988300325349e-05,\n", " 7.004399230936542e-05,\n", " 9.137028973782435e-05,\n", " 5.340397547115572e-05,\n", " 5.0301870942348614e-05,\n", " 9.043936188390944e-06,\n", " 4.6886875679774676e-06,\n", " 4.490133960644016e-06,\n", " 6.136821866675746e-06,\n", " 3.3243470625166083e-06,\n", " 2.348009729757905e-06,\n", " 2.1804094103572425e-06,\n", " 1.958705070137512e-06,\n", " 6.988730092416517e-07,\n", " 5.00343162457284e-07,\n", " 4.1343139400851214e-07,\n", " 5.06081335061026e-07,\n", " 7.039822662591178e-07,\n", " 5.087575800644117e-07,\n", " 2.0332389993882316e-08,\n", " 1.718821529550496e-08,\n", " 1.5028433608677005e-08,\n", " 3.9828059072988253e-08,\n", " 2.8266715190738978e-08,\n", " 2.1497044144780375e-08,\n", " 9.854548288501519e-09\n", "]\n", "\n", "# Sorting the numbers in descending order\n", "numbers.sort(reverse=True)\n", "\n", "# Selecting every 3rd number for plotting\n", "selected_numbers = numbers[::3]\n", "\n", "# Creating x-axis values (epochs)\n", "epochs = list(range(1, len(selected_numbers) + 1))\n", "\n", "# Plotting the curve\n", "plt.plot(epochs, selected_numbers, marker='o', linestyle='-')\n", "plt.xlabel('Epochs')\n", "plt.ylabel('Evaluation Loss')\n", "plt.title('Evaluation Loss vs Epochs')\n", "plt.grid(True)\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7862\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import gradio as gr\n", "import time\n", "def my_function(x, progress=gr.Progress()):\n", " progress(0, desc=\"Starting...\")\n", " time.sleep(1)\n", " for i in progress.tqdm(range(100)):\n", " time.sleep(0.1)\n", " return x\n", "with gr.Blocks() as demo:\n", " a=gr.Textbox()\n", " b=gr.TextArea(\"tt\")\n", " btn=gr.Button(\"Test\")\n", " btn.click(my_function,a,b)\n", "demo.launch()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if self.quantization == '8':\n", " bnb_config = BitsAndBytesConfig( \n", " load_in_8bit= True,\n", " )\n", "elif self.quantization == '4':\n", " bnb_config = BitsAndBytesConfig(\n", " load_in_4bit= True,\n", " bnb_4bit_use_double_quant=True,\n", " bnb_4bit_quant_type=\"nf4\", \n", " bnb_4bit_compute_dtype=torch.bfloat16\n", " )\n", "model = AutoModelForCausalLM.from_pretrained(\n", " base_model,\n", " quantization_config=bnb_config,\n", " torch_dtype=torch.bfloat16,\n", " device_map=\"auto\",\n", " trust_remote_code=True,\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "config = LoraConfig(\n", " r= lora_r if lora_r else 16,\n", " lora_alpha= lora_alpha if lora_alpha else 32,\n", " target_modules=[\"q_proj\", \"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"], \n", " lora_dropout= lora_dropout if lora_dropout else 0.05,\n", " bias=\"none\",\n", " task_type=\"CAUSAL_LM\")\n", "\n", "training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,\n", " gradient_accumulation_steps=GRAD_ACC,\n", " optim=OPTIMIZER,\n", " learning_rate=LR,\n", " fp16=True, \n", " logging_steps=10,\n", " num_train_epochs = epoch if epoch else 2,\n", " output_dir=lora_output,\n", " remove_unused_columns=True,\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "lang", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 2 }