Spaces:
Runtime error
Runtime error
| # generate_scripts.py | |
| import os | |
| import re | |
| import json | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| import spaces | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| def generate_local(model, tokenizer, prompt: str, max_new_tokens: int = 350, temperature: float = 0.7) -> str: | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| inputs = {k: v.to(model.device) for k, v in inputs.items()} # ⬅️ Safely match model's device | |
| output_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=True, | |
| temperature=temperature, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| return tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| def generate_script(model,tokenizer, prompt: str, word_count: int = 60) -> str: | |
| system_prompt = ( | |
| "You are an expert YouTube scriptwriter. " | |
| "Your job is to write the EXACT words that will be spoken aloud in a video. " | |
| f"Topic: {prompt.strip()}\n\n" | |
| "🎯 Output rules:\n" | |
| f"- Exactly {word_count} words.\n" | |
| "- Only the spoken words. NO scene descriptions, instructions, or formatting.\n" | |
| "- Write in natural, clear, and simple English, as if it's being said by a voiceover artist.\n" | |
| "- Keep a steady rhythm (about 2 words per second).\n" | |
| "- Do NOT include any explanations, labels, or headers. Only output the final spoken script.\n\n" | |
| "Start now:" | |
| ) | |
| return generate_local(model,tokenizer, system_prompt) | |
| def one_word(model,tokenizer, query: str) -> str: | |
| prompt_final = ( | |
| "Extract only the unique central theme of the following text in English in JSON format like this: " | |
| '{"keyword": "impact"}. Text: ' + query | |
| ) | |
| result = generate_local(model,tokenizer, prompt_final, max_new_tokens=30, temperature=0.4) | |
| try: | |
| keyword_json = json.loads(result) | |
| keyword = keyword_json.get("keyword", "") | |
| except json.JSONDecodeError: | |
| matches = re.findall(r'\b[a-zA-Z]{3,}\b', result) | |
| keyword = matches[0] if matches else "" | |
| return keyword.lower() | |
| def generate_title(model,tokenizer, text: str) -> str: | |
| prompt_final = ( | |
| "Generate a unique title for a YouTube Short video that is engaging and informative, " | |
| "maximum 100 characters, without emojis, introduction, or explanation. Content:\n" + text | |
| ) | |
| return generate_local(model,tokenizer, prompt_final, max_new_tokens=50, temperature=0.9).strip() | |
| def generate_description(model,tokenizer, text: str) -> str: | |
| prompt_final = ( | |
| "Write only the YouTube video description in English:\n" | |
| "1. A compelling opening line.\n" | |
| "2. A clear summary of the video (max 3 lines).\n" | |
| "3. End with 3 relevant hashtags.\n" | |
| "No emojis or introductions. Here is the text:\n" + text | |
| ) | |
| return generate_local(model,tokenizer, prompt_final, max_new_tokens=300, temperature=0.7).strip() | |
| def generate_tags(model,tokenizer, text: str) -> list: | |
| prompt_final = ( | |
| "List only the important keywords for this YouTube video, separated by commas, " | |
| "maximum 10 keywords. Context: " + text | |
| ) | |
| result = generate_local(model,tokenizer, prompt_final, max_new_tokens=100, temperature=0.5) | |
| return [tag.strip() for tag in result.split(",") if tag.strip()] | |