Spaces:
Running
Running
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +10 -0
- 1_lab1.ipynb +998 -0
- 2_lab2.ipynb +492 -0
- 3_lab3.ipynb +720 -0
- 4_lab4.ipynb +556 -0
- README.md +3 -9
- Untitled +1 -0
- app.py +283 -0
- community_contributions/1_foundations_using_gemini/1_lab1.ipynb +406 -0
- community_contributions/1_foundations_using_gemini/2_lab2.ipynb +492 -0
- community_contributions/1_foundations_using_gemini/3_lab3.ipynb +382 -0
- community_contributions/1_foundations_using_gemini/4_lab4.ipynb +464 -0
- community_contributions/1_foundations_using_gemini/app.py +136 -0
- community_contributions/1_foundations_using_gemini/me/linkedin.pdf +0 -0
- community_contributions/1_foundations_using_gemini/me/summary.txt +11 -0
- community_contributions/1_foundations_using_gemini/requirements.txt +6 -0
- community_contributions/1_lab1_DA.ipynb +396 -0
- community_contributions/1_lab1_Hy.ipynb +688 -0
- community_contributions/1_lab1_Mudassar.ipynb +260 -0
- community_contributions/1_lab1_Thanh.ipynb +165 -0
- community_contributions/1_lab1_cm.ipynb +305 -0
- community_contributions/1_lab1_gemini.ipynb +305 -0
- community_contributions/1_lab1_groq.ipynb +262 -0
- community_contributions/1_lab1_groq_llama.ipynb +296 -0
- community_contributions/1_lab1_marstipton_mac.ipynb +411 -0
- community_contributions/1_lab1_moneek.ipynb +407 -0
- community_contributions/1_lab1_open_router.ipynb +323 -0
- community_contributions/1_lab2_Kaushik_Parallelization.ipynb +355 -0
- community_contributions/1_lab2_Routing_Workflow.ipynb +514 -0
- community_contributions/2_lab2-Evaluator-AnnpaS18.ipynb +474 -0
- community_contributions/2_lab2-judge-prompt-changed.ipynb +476 -0
- community_contributions/2_lab2-parallelization.ipynb +440 -0
- community_contributions/2_lab2.ipynb +517 -0
- community_contributions/2_lab2_Execution_measurement.py +401 -0
- community_contributions/2_lab2_ReAct_Pattern.ipynb +289 -0
- community_contributions/2_lab2_akash_parallelization.ipynb +295 -0
- community_contributions/2_lab2_async.ipynb +474 -0
- community_contributions/2_lab2_async_with_reasons.ipynb +490 -0
- community_contributions/2_lab2_doclee99_gpt5_improves_gemini.25flash.ipynb +620 -0
- community_contributions/2_lab2_evaluator_mars.ipynb +677 -0
- community_contributions/2_lab2_exercise.ipynb +336 -0
- community_contributions/2_lab2_exercise_BrettSanders_ChainOfThought.ipynb +241 -0
- community_contributions/2_lab2_llm_reviewer.ipynb +627 -0
- community_contributions/2_lab2_moneek.ipynb +173 -0
- community_contributions/2_lab2_multi-evaluation-criteria.ipynb +506 -0
- community_contributions/2_lab2_orchestrator.ipynb +494 -0
- community_contributions/2_lab2_perplexity_support.ipynb +497 -0
- community_contributions/2_lab2_qualitycode_review.ipynb +320 -0
- community_contributions/2_lab2_reflection_pattern.ipynb +311 -0
- community_contributions/2_lab2_reflection_pattern2.ipynb +999 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
community_contributions/amirna2_contributions/personal-ai/me/resume.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
community_contributions/careerwise_gemini_ntfy/me/resume_for_Virtual_Assistant.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
community_contributions/ChatBot_with_evaluator_and_notifier/career_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
community_contributions/hidden_gems_world_travel_guide/Screenshot1.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
community_contributions/jongkook/me/Jongkook[[:space:]]Kim[[:space:]]-[[:space:]]Resume.pdf filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
community_contributions/NLP_Agent_Dinesh_Uthayakumar/eval1_capital.wav filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
community_contributions/NLP_Agent_Dinesh_Uthayakumar/eval2_money_customers_owe.wav filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
community_contributions/NLP_Agent_Dinesh_Uthayakumar/eval3_total_estimated_revenue.wav filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
community_contributions/seung-gu/me/linkedin.pdf filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
me/Urvashi[[:space:]]Patel[[:space:]]CV.pdf filter=lfs diff=lfs merge=lfs -text
|
1_lab1.ipynb
ADDED
|
@@ -0,0 +1,998 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
+
" <tr>\n",
|
| 16 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
+
" </td>\n",
|
| 19 |
+
" <td>\n",
|
| 20 |
+
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
+
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
+
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
+
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
+
" Well in that case, you're ready!!\n",
|
| 25 |
+
" </span>\n",
|
| 26 |
+
" </td>\n",
|
| 27 |
+
" </tr>\n",
|
| 28 |
+
"</table>"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"source": [
|
| 35 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
+
" <tr>\n",
|
| 37 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
+
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
+
" </td>\n",
|
| 40 |
+
" <td>\n",
|
| 41 |
+
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
+
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
+
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
+
" </span>\n",
|
| 45 |
+
" </td>\n",
|
| 46 |
+
" </tr>\n",
|
| 47 |
+
"</table>"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "markdown",
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"source": [
|
| 54 |
+
"### And please do remember to contact me if I can help\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
+
"\n",
|
| 61 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
+
"- Open extensions (View >> extensions)\n",
|
| 63 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
+
"\n",
|
| 67 |
+
"And then:\n",
|
| 68 |
+
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
+
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
+
"3. Enjoy!\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
+
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
+
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
+
"And then try again.\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
+
"`conda deactivate` \n",
|
| 81 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
+
"`conda config --set auto_activate_base false` \n",
|
| 83 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "code",
|
| 88 |
+
"execution_count": 1,
|
| 89 |
+
"metadata": {},
|
| 90 |
+
"outputs": [],
|
| 91 |
+
"source": [
|
| 92 |
+
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"from dotenv import load_dotenv \n"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": 2,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [
|
| 102 |
+
{
|
| 103 |
+
"data": {
|
| 104 |
+
"text/plain": [
|
| 105 |
+
"True"
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
"execution_count": 2,
|
| 109 |
+
"metadata": {},
|
| 110 |
+
"output_type": "execute_result"
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"source": [
|
| 114 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 115 |
+
"# If this returns false, see the next cell!\n",
|
| 116 |
+
"\n",
|
| 117 |
+
"load_dotenv(override=True)"
|
| 118 |
+
]
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"cell_type": "markdown",
|
| 122 |
+
"metadata": {},
|
| 123 |
+
"source": [
|
| 124 |
+
"### Wait, did that just output `False`??\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 127 |
+
"\n",
|
| 128 |
+
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 129 |
+
"\n",
|
| 130 |
+
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 131 |
+
]
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"cell_type": "markdown",
|
| 135 |
+
"metadata": {},
|
| 136 |
+
"source": [
|
| 137 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 138 |
+
" <tr>\n",
|
| 139 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 140 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 141 |
+
" </td>\n",
|
| 142 |
+
" <td>\n",
|
| 143 |
+
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 144 |
+
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 145 |
+
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 146 |
+
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 147 |
+
" </span>\n",
|
| 148 |
+
" </td>\n",
|
| 149 |
+
" </tr>\n",
|
| 150 |
+
"</table>"
|
| 151 |
+
]
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
"cell_type": "code",
|
| 155 |
+
"execution_count": 3,
|
| 156 |
+
"metadata": {},
|
| 157 |
+
"outputs": [
|
| 158 |
+
{
|
| 159 |
+
"name": "stdout",
|
| 160 |
+
"output_type": "stream",
|
| 161 |
+
"text": [
|
| 162 |
+
"OpenAI API Key exists and begins sk-proj-\n"
|
| 163 |
+
]
|
| 164 |
+
}
|
| 165 |
+
],
|
| 166 |
+
"source": [
|
| 167 |
+
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 168 |
+
"\n",
|
| 169 |
+
"import os\n",
|
| 170 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 171 |
+
"\n",
|
| 172 |
+
"if openai_api_key:\n",
|
| 173 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 174 |
+
"else:\n",
|
| 175 |
+
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 176 |
+
" \n"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"cell_type": "code",
|
| 181 |
+
"execution_count": 4,
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"outputs": [],
|
| 184 |
+
"source": [
|
| 185 |
+
"# And now - the all important import statement\n",
|
| 186 |
+
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 187 |
+
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 188 |
+
"\n",
|
| 189 |
+
"from openai import OpenAI"
|
| 190 |
+
]
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"cell_type": "code",
|
| 194 |
+
"execution_count": 5,
|
| 195 |
+
"metadata": {},
|
| 196 |
+
"outputs": [],
|
| 197 |
+
"source": [
|
| 198 |
+
"# And now we'll create an instance of the OpenAI class\n",
|
| 199 |
+
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 200 |
+
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 201 |
+
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 202 |
+
"\n",
|
| 203 |
+
"openai = OpenAI()"
|
| 204 |
+
]
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"cell_type": "code",
|
| 208 |
+
"execution_count": 6,
|
| 209 |
+
"metadata": {},
|
| 210 |
+
"outputs": [],
|
| 211 |
+
"source": [
|
| 212 |
+
"# Create a list of messages in the familiar OpenAI format\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "code",
|
| 219 |
+
"execution_count": 7,
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [
|
| 222 |
+
{
|
| 223 |
+
"name": "stdout",
|
| 224 |
+
"output_type": "stream",
|
| 225 |
+
"text": [
|
| 226 |
+
"2 + 2 equals 4.\n"
|
| 227 |
+
]
|
| 228 |
+
}
|
| 229 |
+
],
|
| 230 |
+
"source": [
|
| 231 |
+
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 232 |
+
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 233 |
+
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 234 |
+
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 235 |
+
"\n",
|
| 236 |
+
"response = openai.chat.completions.create(\n",
|
| 237 |
+
" model=\"gpt-4.1-nano\",\n",
|
| 238 |
+
" messages=messages\n",
|
| 239 |
+
")\n",
|
| 240 |
+
"\n",
|
| 241 |
+
"print(response.choices[0].message.content)\n"
|
| 242 |
+
]
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"cell_type": "code",
|
| 246 |
+
"execution_count": 8,
|
| 247 |
+
"metadata": {},
|
| 248 |
+
"outputs": [],
|
| 249 |
+
"source": [
|
| 250 |
+
"# And now - let's ask for a question:\n",
|
| 251 |
+
"\n",
|
| 252 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 253 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"execution_count": 9,
|
| 259 |
+
"metadata": {},
|
| 260 |
+
"outputs": [
|
| 261 |
+
{
|
| 262 |
+
"name": "stdout",
|
| 263 |
+
"output_type": "stream",
|
| 264 |
+
"text": [
|
| 265 |
+
"If two trains start from the same point heading in opposite directions, one traveling at 60 mph and the other at 40 mph, and after 3 hours one train returns towards the starting point at 50 mph, how long after the start will the two trains be exactly 210 miles apart?\n"
|
| 266 |
+
]
|
| 267 |
+
}
|
| 268 |
+
],
|
| 269 |
+
"source": [
|
| 270 |
+
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 271 |
+
"\n",
|
| 272 |
+
"response = openai.chat.completions.create(\n",
|
| 273 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 274 |
+
" messages=messages\n",
|
| 275 |
+
")\n",
|
| 276 |
+
"\n",
|
| 277 |
+
"question = response.choices[0].message.content\n",
|
| 278 |
+
"\n",
|
| 279 |
+
"print(question)\n"
|
| 280 |
+
]
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"cell_type": "code",
|
| 284 |
+
"execution_count": 10,
|
| 285 |
+
"metadata": {},
|
| 286 |
+
"outputs": [],
|
| 287 |
+
"source": [
|
| 288 |
+
"# form a new messages list\n",
|
| 289 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 290 |
+
]
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"cell_type": "code",
|
| 294 |
+
"execution_count": 11,
|
| 295 |
+
"metadata": {},
|
| 296 |
+
"outputs": [
|
| 297 |
+
{
|
| 298 |
+
"name": "stdout",
|
| 299 |
+
"output_type": "stream",
|
| 300 |
+
"text": [
|
| 301 |
+
"Let's carefully analyze the problem step by step.\n",
|
| 302 |
+
"\n",
|
| 303 |
+
"---\n",
|
| 304 |
+
"\n",
|
| 305 |
+
"### Problem restatement:\n",
|
| 306 |
+
"- Two trains start from the same point at the same time, traveling in opposite directions.\n",
|
| 307 |
+
"- Train A travels at 60 mph.\n",
|
| 308 |
+
"- Train B travels at 40 mph.\n",
|
| 309 |
+
"- After 3 hours, **one** train (which one? The problem implies it is the slower one, but let's clarify) returns toward the starting point at 50 mph.\n",
|
| 310 |
+
"- Question: After how long from the start will the two trains be exactly 210 miles apart?\n",
|
| 311 |
+
"\n",
|
| 312 |
+
"---\n",
|
| 313 |
+
"\n",
|
| 314 |
+
"### Step 1: Understand the setup and assumptions\n",
|
| 315 |
+
"\n",
|
| 316 |
+
"- The two trains start at time \\( t=0 \\), at the same location.\n",
|
| 317 |
+
"- One train moves east, the other west (opposite directions).\n",
|
| 318 |
+
"- Speeds initially: Train A = 60 mph, Train B = 40 mph.\n",
|
| 319 |
+
"- At \\( t=3 \\) hours, **one train returns toward the start at 50 mph**.\n",
|
| 320 |
+
"\n",
|
| 321 |
+
"We need to know which train turns back at 3 hours and changes speed to 50 mph going toward the starting point.\n",
|
| 322 |
+
"\n",
|
| 323 |
+
"---\n",
|
| 324 |
+
"\n",
|
| 325 |
+
"### Clarification:\n",
|
| 326 |
+
"\n",
|
| 327 |
+
"It’s more logical that the train going 40 mph (the slower train) is the one that returns toward the start at 50 mph after 3 hours (could be either, but let's proceed with Train B).\n",
|
| 328 |
+
"\n",
|
| 329 |
+
"Let:\n",
|
| 330 |
+
"\n",
|
| 331 |
+
"- Train A: speed 60 mph, moves \"right\" (positive direction)\n",
|
| 332 |
+
"- Train B: speed 40 mph, moves \"left\" (negative direction)\n",
|
| 333 |
+
"\n",
|
| 334 |
+
"At \\( t=3 \\):\n",
|
| 335 |
+
"\n",
|
| 336 |
+
"- Train B turns around and moves \"right\" toward the starting point at 50 mph.\n",
|
| 337 |
+
"\n",
|
| 338 |
+
"We want to find time \\( T \\) (in hours) from the start when the distance between the trains is 210 miles.\n",
|
| 339 |
+
"\n",
|
| 340 |
+
"---\n",
|
| 341 |
+
"\n",
|
| 342 |
+
"### Step 2: Express positions of both trains over time\n",
|
| 343 |
+
"\n",
|
| 344 |
+
"Define:\n",
|
| 345 |
+
"- \\( t \\) = time in hours from start.\n",
|
| 346 |
+
"\n",
|
| 347 |
+
"Positions:\n",
|
| 348 |
+
"\n",
|
| 349 |
+
"- Train A (always to the right, constant 60 mph):\n",
|
| 350 |
+
"\n",
|
| 351 |
+
"\\[\n",
|
| 352 |
+
"x_A(t) = 60t\n",
|
| 353 |
+
"\\]\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"- Train B:\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"1. From \\( 0 \\leq t \\leq 3 \\):\n",
|
| 358 |
+
"\n",
|
| 359 |
+
"\\[\n",
|
| 360 |
+
"x_B(t) = -40t\n",
|
| 361 |
+
"\\]\n",
|
| 362 |
+
"\n",
|
| 363 |
+
"2. From \\( t > 3 \\):\n",
|
| 364 |
+
"\n",
|
| 365 |
+
"At \\( t=3 \\), position:\n",
|
| 366 |
+
"\n",
|
| 367 |
+
"\\[\n",
|
| 368 |
+
"x_B(3) = -40 \\times 3 = -120\n",
|
| 369 |
+
"\\]\n",
|
| 370 |
+
"\n",
|
| 371 |
+
"Then Train B reverses direction and moves toward the start at 50 mph.\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"Since it was at \\(-120\\), moving right at 50 mph means:\n",
|
| 374 |
+
"\n",
|
| 375 |
+
"\\[\n",
|
| 376 |
+
"x_B(t) = x_B(3) + 50 (t - 3) = -120 + 50(t-3), \\quad t > 3\n",
|
| 377 |
+
"\\]\n",
|
| 378 |
+
"\n",
|
| 379 |
+
"---\n",
|
| 380 |
+
"\n",
|
| 381 |
+
"### Step 3: Write the distance between trains at time \\( t \\)\n",
|
| 382 |
+
"\n",
|
| 383 |
+
"Distance between trains:\n",
|
| 384 |
+
"\n",
|
| 385 |
+
"\\[\n",
|
| 386 |
+
"D(t) = | x_A(t) - x_B(t) |\n",
|
| 387 |
+
"\\]\n",
|
| 388 |
+
"\n",
|
| 389 |
+
"---\n",
|
| 390 |
+
"\n",
|
| 391 |
+
"### Step 4: Find when distance is 210 miles\n",
|
| 392 |
+
"\n",
|
| 393 |
+
"We have two time intervals to consider:\n",
|
| 394 |
+
"\n",
|
| 395 |
+
"- For \\( 0 \\leq t \\leq 3 \\)\n",
|
| 396 |
+
"- For \\( t > 3 \\)\n",
|
| 397 |
+
"\n",
|
| 398 |
+
"---\n",
|
| 399 |
+
"\n",
|
| 400 |
+
"### Case 1: \\( 0 \\leq t \\leq 3 \\)\n",
|
| 401 |
+
"\n",
|
| 402 |
+
"Positions:\n",
|
| 403 |
+
"\n",
|
| 404 |
+
"- \\( x_A = 60t \\)\n",
|
| 405 |
+
"- \\( x_B = -40t \\)\n",
|
| 406 |
+
"\n",
|
| 407 |
+
"Distance:\n",
|
| 408 |
+
"\n",
|
| 409 |
+
"\\[\n",
|
| 410 |
+
"D(t) = |60t - (-40t)| = |100t| = 100t\n",
|
| 411 |
+
"\\]\n",
|
| 412 |
+
"\n",
|
| 413 |
+
"Set equal to 210:\n",
|
| 414 |
+
"\n",
|
| 415 |
+
"\\[\n",
|
| 416 |
+
"100t = 210 \\implies t = 2.1 \\, \\text{hours}\n",
|
| 417 |
+
"\\]\n",
|
| 418 |
+
"\n",
|
| 419 |
+
"Since 2.1 < 3, the first time they are 210 miles apart is at 2.1 hours.\n",
|
| 420 |
+
"\n",
|
| 421 |
+
"---\n",
|
| 422 |
+
"\n",
|
| 423 |
+
"### Case 2: \\( t > 3 \\)\n",
|
| 424 |
+
"\n",
|
| 425 |
+
"Positions:\n",
|
| 426 |
+
"\n",
|
| 427 |
+
"- \\( x_A = 60t \\)\n",
|
| 428 |
+
"- \\( x_B = -120 + 50(t-3) = -120 + 50t - 150 = 50t - 270 \\)\n",
|
| 429 |
+
"\n",
|
| 430 |
+
"Distance:\n",
|
| 431 |
+
"\n",
|
| 432 |
+
"\\[\n",
|
| 433 |
+
"D(t) = |60t - (50t - 270)| = |60t - 50t + 270| = |10t + 270|\n",
|
| 434 |
+
"\\]\n",
|
| 435 |
+
"\n",
|
| 436 |
+
"Since \\( t > 3 \\), \\( 10t + 270 > 0 \\), so:\n",
|
| 437 |
+
"\n",
|
| 438 |
+
"\\[\n",
|
| 439 |
+
"D(t) = 10t + 270\n",
|
| 440 |
+
"\\]\n",
|
| 441 |
+
"\n",
|
| 442 |
+
"Set equal to 210:\n",
|
| 443 |
+
"\n",
|
| 444 |
+
"\\[\n",
|
| 445 |
+
"10t + 270 = 210 \\implies 10t = -60 \\implies t = -6\n",
|
| 446 |
+
"\\]\n",
|
| 447 |
+
"\n",
|
| 448 |
+
"Negative time, invalid.\n",
|
| 449 |
+
"\n",
|
| 450 |
+
"---\n",
|
| 451 |
+
"\n",
|
| 452 |
+
"### Conclusion:\n",
|
| 453 |
+
"\n",
|
| 454 |
+
"- Distance reaches 210 miles first at \\( t = 2.1 \\) hours before the 3-hour mark.\n",
|
| 455 |
+
"- After 3 hours, distance will always be greater than 270 miles (since the formula gives \\(D(t) = 10t + 270\\)) and no later time will be 210 miles apart again.\n",
|
| 456 |
+
"\n",
|
| 457 |
+
"---\n",
|
| 458 |
+
"\n",
|
| 459 |
+
"### Final answer:\n",
|
| 460 |
+
"\n",
|
| 461 |
+
"\\[\n",
|
| 462 |
+
"\\boxed{2.1 \\text{ hours after the start}}\n",
|
| 463 |
+
"\\]\n",
|
| 464 |
+
"\n",
|
| 465 |
+
"---\n",
|
| 466 |
+
"\n",
|
| 467 |
+
"**Summary**: The trains are exactly 210 miles apart for the first time 2.1 hours after starting. The turnaround of one train at 3 hours does not cause them to be 210 miles apart again.\n"
|
| 468 |
+
]
|
| 469 |
+
}
|
| 470 |
+
],
|
| 471 |
+
"source": [
|
| 472 |
+
"# Ask it again\n",
|
| 473 |
+
"\n",
|
| 474 |
+
"response = openai.chat.completions.create(\n",
|
| 475 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 476 |
+
" messages=messages\n",
|
| 477 |
+
")\n",
|
| 478 |
+
"\n",
|
| 479 |
+
"answer = response.choices[0].message.content\n",
|
| 480 |
+
"print(answer)\n"
|
| 481 |
+
]
|
| 482 |
+
},
|
| 483 |
+
{
|
| 484 |
+
"cell_type": "code",
|
| 485 |
+
"execution_count": 12,
|
| 486 |
+
"metadata": {},
|
| 487 |
+
"outputs": [
|
| 488 |
+
{
|
| 489 |
+
"data": {
|
| 490 |
+
"text/markdown": [
|
| 491 |
+
"Let's carefully analyze the problem step by step.\n",
|
| 492 |
+
"\n",
|
| 493 |
+
"---\n",
|
| 494 |
+
"\n",
|
| 495 |
+
"### Problem restatement:\n",
|
| 496 |
+
"- Two trains start from the same point at the same time, traveling in opposite directions.\n",
|
| 497 |
+
"- Train A travels at 60 mph.\n",
|
| 498 |
+
"- Train B travels at 40 mph.\n",
|
| 499 |
+
"- After 3 hours, **one** train (which one? The problem implies it is the slower one, but let's clarify) returns toward the starting point at 50 mph.\n",
|
| 500 |
+
"- Question: After how long from the start will the two trains be exactly 210 miles apart?\n",
|
| 501 |
+
"\n",
|
| 502 |
+
"---\n",
|
| 503 |
+
"\n",
|
| 504 |
+
"### Step 1: Understand the setup and assumptions\n",
|
| 505 |
+
"\n",
|
| 506 |
+
"- The two trains start at time \\( t=0 \\), at the same location.\n",
|
| 507 |
+
"- One train moves east, the other west (opposite directions).\n",
|
| 508 |
+
"- Speeds initially: Train A = 60 mph, Train B = 40 mph.\n",
|
| 509 |
+
"- At \\( t=3 \\) hours, **one train returns toward the start at 50 mph**.\n",
|
| 510 |
+
"\n",
|
| 511 |
+
"We need to know which train turns back at 3 hours and changes speed to 50 mph going toward the starting point.\n",
|
| 512 |
+
"\n",
|
| 513 |
+
"---\n",
|
| 514 |
+
"\n",
|
| 515 |
+
"### Clarification:\n",
|
| 516 |
+
"\n",
|
| 517 |
+
"It’s more logical that the train going 40 mph (the slower train) is the one that returns toward the start at 50 mph after 3 hours (could be either, but let's proceed with Train B).\n",
|
| 518 |
+
"\n",
|
| 519 |
+
"Let:\n",
|
| 520 |
+
"\n",
|
| 521 |
+
"- Train A: speed 60 mph, moves \"right\" (positive direction)\n",
|
| 522 |
+
"- Train B: speed 40 mph, moves \"left\" (negative direction)\n",
|
| 523 |
+
"\n",
|
| 524 |
+
"At \\( t=3 \\):\n",
|
| 525 |
+
"\n",
|
| 526 |
+
"- Train B turns around and moves \"right\" toward the starting point at 50 mph.\n",
|
| 527 |
+
"\n",
|
| 528 |
+
"We want to find time \\( T \\) (in hours) from the start when the distance between the trains is 210 miles.\n",
|
| 529 |
+
"\n",
|
| 530 |
+
"---\n",
|
| 531 |
+
"\n",
|
| 532 |
+
"### Step 2: Express positions of both trains over time\n",
|
| 533 |
+
"\n",
|
| 534 |
+
"Define:\n",
|
| 535 |
+
"- \\( t \\) = time in hours from start.\n",
|
| 536 |
+
"\n",
|
| 537 |
+
"Positions:\n",
|
| 538 |
+
"\n",
|
| 539 |
+
"- Train A (always to the right, constant 60 mph):\n",
|
| 540 |
+
"\n",
|
| 541 |
+
"\\[\n",
|
| 542 |
+
"x_A(t) = 60t\n",
|
| 543 |
+
"\\]\n",
|
| 544 |
+
"\n",
|
| 545 |
+
"- Train B:\n",
|
| 546 |
+
"\n",
|
| 547 |
+
"1. From \\( 0 \\leq t \\leq 3 \\):\n",
|
| 548 |
+
"\n",
|
| 549 |
+
"\\[\n",
|
| 550 |
+
"x_B(t) = -40t\n",
|
| 551 |
+
"\\]\n",
|
| 552 |
+
"\n",
|
| 553 |
+
"2. From \\( t > 3 \\):\n",
|
| 554 |
+
"\n",
|
| 555 |
+
"At \\( t=3 \\), position:\n",
|
| 556 |
+
"\n",
|
| 557 |
+
"\\[\n",
|
| 558 |
+
"x_B(3) = -40 \\times 3 = -120\n",
|
| 559 |
+
"\\]\n",
|
| 560 |
+
"\n",
|
| 561 |
+
"Then Train B reverses direction and moves toward the start at 50 mph.\n",
|
| 562 |
+
"\n",
|
| 563 |
+
"Since it was at \\(-120\\), moving right at 50 mph means:\n",
|
| 564 |
+
"\n",
|
| 565 |
+
"\\[\n",
|
| 566 |
+
"x_B(t) = x_B(3) + 50 (t - 3) = -120 + 50(t-3), \\quad t > 3\n",
|
| 567 |
+
"\\]\n",
|
| 568 |
+
"\n",
|
| 569 |
+
"---\n",
|
| 570 |
+
"\n",
|
| 571 |
+
"### Step 3: Write the distance between trains at time \\( t \\)\n",
|
| 572 |
+
"\n",
|
| 573 |
+
"Distance between trains:\n",
|
| 574 |
+
"\n",
|
| 575 |
+
"\\[\n",
|
| 576 |
+
"D(t) = | x_A(t) - x_B(t) |\n",
|
| 577 |
+
"\\]\n",
|
| 578 |
+
"\n",
|
| 579 |
+
"---\n",
|
| 580 |
+
"\n",
|
| 581 |
+
"### Step 4: Find when distance is 210 miles\n",
|
| 582 |
+
"\n",
|
| 583 |
+
"We have two time intervals to consider:\n",
|
| 584 |
+
"\n",
|
| 585 |
+
"- For \\( 0 \\leq t \\leq 3 \\)\n",
|
| 586 |
+
"- For \\( t > 3 \\)\n",
|
| 587 |
+
"\n",
|
| 588 |
+
"---\n",
|
| 589 |
+
"\n",
|
| 590 |
+
"### Case 1: \\( 0 \\leq t \\leq 3 \\)\n",
|
| 591 |
+
"\n",
|
| 592 |
+
"Positions:\n",
|
| 593 |
+
"\n",
|
| 594 |
+
"- \\( x_A = 60t \\)\n",
|
| 595 |
+
"- \\( x_B = -40t \\)\n",
|
| 596 |
+
"\n",
|
| 597 |
+
"Distance:\n",
|
| 598 |
+
"\n",
|
| 599 |
+
"\\[\n",
|
| 600 |
+
"D(t) = |60t - (-40t)| = |100t| = 100t\n",
|
| 601 |
+
"\\]\n",
|
| 602 |
+
"\n",
|
| 603 |
+
"Set equal to 210:\n",
|
| 604 |
+
"\n",
|
| 605 |
+
"\\[\n",
|
| 606 |
+
"100t = 210 \\implies t = 2.1 \\, \\text{hours}\n",
|
| 607 |
+
"\\]\n",
|
| 608 |
+
"\n",
|
| 609 |
+
"Since 2.1 < 3, the first time they are 210 miles apart is at 2.1 hours.\n",
|
| 610 |
+
"\n",
|
| 611 |
+
"---\n",
|
| 612 |
+
"\n",
|
| 613 |
+
"### Case 2: \\( t > 3 \\)\n",
|
| 614 |
+
"\n",
|
| 615 |
+
"Positions:\n",
|
| 616 |
+
"\n",
|
| 617 |
+
"- \\( x_A = 60t \\)\n",
|
| 618 |
+
"- \\( x_B = -120 + 50(t-3) = -120 + 50t - 150 = 50t - 270 \\)\n",
|
| 619 |
+
"\n",
|
| 620 |
+
"Distance:\n",
|
| 621 |
+
"\n",
|
| 622 |
+
"\\[\n",
|
| 623 |
+
"D(t) = |60t - (50t - 270)| = |60t - 50t + 270| = |10t + 270|\n",
|
| 624 |
+
"\\]\n",
|
| 625 |
+
"\n",
|
| 626 |
+
"Since \\( t > 3 \\), \\( 10t + 270 > 0 \\), so:\n",
|
| 627 |
+
"\n",
|
| 628 |
+
"\\[\n",
|
| 629 |
+
"D(t) = 10t + 270\n",
|
| 630 |
+
"\\]\n",
|
| 631 |
+
"\n",
|
| 632 |
+
"Set equal to 210:\n",
|
| 633 |
+
"\n",
|
| 634 |
+
"\\[\n",
|
| 635 |
+
"10t + 270 = 210 \\implies 10t = -60 \\implies t = -6\n",
|
| 636 |
+
"\\]\n",
|
| 637 |
+
"\n",
|
| 638 |
+
"Negative time, invalid.\n",
|
| 639 |
+
"\n",
|
| 640 |
+
"---\n",
|
| 641 |
+
"\n",
|
| 642 |
+
"### Conclusion:\n",
|
| 643 |
+
"\n",
|
| 644 |
+
"- Distance reaches 210 miles first at \\( t = 2.1 \\) hours before the 3-hour mark.\n",
|
| 645 |
+
"- After 3 hours, distance will always be greater than 270 miles (since the formula gives \\(D(t) = 10t + 270\\)) and no later time will be 210 miles apart again.\n",
|
| 646 |
+
"\n",
|
| 647 |
+
"---\n",
|
| 648 |
+
"\n",
|
| 649 |
+
"### Final answer:\n",
|
| 650 |
+
"\n",
|
| 651 |
+
"\\[\n",
|
| 652 |
+
"\\boxed{2.1 \\text{ hours after the start}}\n",
|
| 653 |
+
"\\]\n",
|
| 654 |
+
"\n",
|
| 655 |
+
"---\n",
|
| 656 |
+
"\n",
|
| 657 |
+
"**Summary**: The trains are exactly 210 miles apart for the first time 2.1 hours after starting. The turnaround of one train at 3 hours does not cause them to be 210 miles apart again."
|
| 658 |
+
],
|
| 659 |
+
"text/plain": [
|
| 660 |
+
"<IPython.core.display.Markdown object>"
|
| 661 |
+
]
|
| 662 |
+
},
|
| 663 |
+
"metadata": {},
|
| 664 |
+
"output_type": "display_data"
|
| 665 |
+
}
|
| 666 |
+
],
|
| 667 |
+
"source": [
|
| 668 |
+
"from IPython.display import Markdown, display\n",
|
| 669 |
+
"\n",
|
| 670 |
+
"display(Markdown(answer))\n",
|
| 671 |
+
"\n"
|
| 672 |
+
]
|
| 673 |
+
},
|
| 674 |
+
{
|
| 675 |
+
"cell_type": "markdown",
|
| 676 |
+
"metadata": {},
|
| 677 |
+
"source": [
|
| 678 |
+
"# Congratulations!\n",
|
| 679 |
+
"\n",
|
| 680 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 681 |
+
"\n",
|
| 682 |
+
"Next time things get more interesting..."
|
| 683 |
+
]
|
| 684 |
+
},
|
| 685 |
+
{
|
| 686 |
+
"cell_type": "markdown",
|
| 687 |
+
"metadata": {},
|
| 688 |
+
"source": [
|
| 689 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 690 |
+
" <tr>\n",
|
| 691 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 692 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 693 |
+
" </td>\n",
|
| 694 |
+
" <td>\n",
|
| 695 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 696 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 697 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 698 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 699 |
+
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 700 |
+
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 701 |
+
" </span>\n",
|
| 702 |
+
" </td>\n",
|
| 703 |
+
" </tr>\n",
|
| 704 |
+
"</table>"
|
| 705 |
+
]
|
| 706 |
+
},
|
| 707 |
+
{
|
| 708 |
+
"cell_type": "code",
|
| 709 |
+
"execution_count": 19,
|
| 710 |
+
"metadata": {},
|
| 711 |
+
"outputs": [
|
| 712 |
+
{
|
| 713 |
+
"name": "stdout",
|
| 714 |
+
"output_type": "stream",
|
| 715 |
+
"text": [
|
| 716 |
+
"OpenAI API Key exists and begins sk-proj-\n",
|
| 717 |
+
"2 + 2 = 4\n"
|
| 718 |
+
]
|
| 719 |
+
}
|
| 720 |
+
],
|
| 721 |
+
"source": [
|
| 722 |
+
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 723 |
+
"\n",
|
| 724 |
+
"import os\n",
|
| 725 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 726 |
+
"\n",
|
| 727 |
+
"if openai_api_key:\n",
|
| 728 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 729 |
+
"else:\n",
|
| 730 |
+
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 731 |
+
"\n",
|
| 732 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}] \n",
|
| 733 |
+
" \n",
|
| 734 |
+
"response = openai.chat.completions.create(\n",
|
| 735 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 736 |
+
" messages=messages\n",
|
| 737 |
+
")\n",
|
| 738 |
+
"\n",
|
| 739 |
+
"# Then read the business idea:\n",
|
| 740 |
+
"\n",
|
| 741 |
+
"business_idea = response.choices[0].message.content\n",
|
| 742 |
+
"print(business_idea)\n",
|
| 743 |
+
"\n",
|
| 744 |
+
"# And repeat! In the next message, include the business idea within the message"
|
| 745 |
+
]
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"cell_type": "markdown",
|
| 749 |
+
"metadata": {},
|
| 750 |
+
"source": []
|
| 751 |
+
},
|
| 752 |
+
{
|
| 753 |
+
"cell_type": "code",
|
| 754 |
+
"execution_count": null,
|
| 755 |
+
"metadata": {},
|
| 756 |
+
"outputs": [],
|
| 757 |
+
"source": [
|
| 758 |
+
"question = \"pick a business area that might be worth exploring for Agentic AI opportunities.\"\n",
|
| 759 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 760 |
+
]
|
| 761 |
+
},
|
| 762 |
+
{
|
| 763 |
+
"cell_type": "code",
|
| 764 |
+
"execution_count": 21,
|
| 765 |
+
"metadata": {},
|
| 766 |
+
"outputs": [
|
| 767 |
+
{
|
| 768 |
+
"name": "stdout",
|
| 769 |
+
"output_type": "stream",
|
| 770 |
+
"text": [
|
| 771 |
+
"One promising business area to explore for Agentic AI opportunities is **Supply Chain and Logistics Management**.\n",
|
| 772 |
+
"\n",
|
| 773 |
+
"### Why Supply Chain and Logistics?\n",
|
| 774 |
+
"- **Complex Decision-Making:** Supply chains involve multifaceted decisions such as inventory management, demand forecasting, route optimization, and supplier coordination. Agentic AI can autonomously navigate these complexities to optimize outcomes.\n",
|
| 775 |
+
"- **Dynamic Environment:** Supply chains are affected by real-time changes (e.g., weather, demand fluctuations, geopolitical events). Agentic AI can adapt strategies proactively without human intervention.\n",
|
| 776 |
+
"- **Cost Efficiency:** Automating procurement, warehousing, and delivery decisions can significantly reduce operational costs.\n",
|
| 777 |
+
"- **Sustainability:** Agentic AI can optimize routes and inventory to reduce waste and carbon footprint, addressing increasing regulatory and consumer demand for sustainability.\n",
|
| 778 |
+
"- **Integration of IoT and Data:** With IoT sensors, real-time data streams provide rich input that Agentic AI can use to make autonomous adjustments throughout the supply chain.\n",
|
| 779 |
+
"\n",
|
| 780 |
+
"### Example Applications\n",
|
| 781 |
+
"- Autonomous demand forecasting agents that adjust inventory orders dynamically.\n",
|
| 782 |
+
"- Intelligent route planning agents that optimize last-mile delivery based on traffic and weather.\n",
|
| 783 |
+
"- Negotiation agents that autonomously interact with suppliers to secure best prices and terms.\n",
|
| 784 |
+
"- Risk management agents that predict disruptions and adjust supplier networks or logistics proactively.\n",
|
| 785 |
+
"\n",
|
| 786 |
+
"### Summary\n",
|
| 787 |
+
"Supply chain and logistics management stands out as a fertile ground for Agentic AI deployment because of its complexity, dynamic nature, and high impact on business performance. Agentic AI can deliver autonomous decision-making and continuous optimization, unlocking significant efficiency and competitive advantages.\n"
|
| 788 |
+
]
|
| 789 |
+
}
|
| 790 |
+
],
|
| 791 |
+
"source": [
|
| 792 |
+
"response = openai.chat.completions.create(\n",
|
| 793 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 794 |
+
" messages=messages\n",
|
| 795 |
+
")\n",
|
| 796 |
+
"\n",
|
| 797 |
+
"question = response.choices[0].message.content\n",
|
| 798 |
+
"\n",
|
| 799 |
+
"print(question)"
|
| 800 |
+
]
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"cell_type": "code",
|
| 804 |
+
"execution_count": 24,
|
| 805 |
+
"metadata": {},
|
| 806 |
+
"outputs": [],
|
| 807 |
+
"source": [
|
| 808 |
+
"question = \"pain point in the supply chain and logistics management something challenging that might be ripe for agentic solution\"\n",
|
| 809 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 810 |
+
]
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"cell_type": "code",
|
| 814 |
+
"execution_count": 25,
|
| 815 |
+
"metadata": {},
|
| 816 |
+
"outputs": [
|
| 817 |
+
{
|
| 818 |
+
"name": "stdout",
|
| 819 |
+
"output_type": "stream",
|
| 820 |
+
"text": [
|
| 821 |
+
"A significant pain point in supply chain and logistics management is **real-time visibility and dynamic decision-making under uncertainty**. Specifically:\n",
|
| 822 |
+
"\n",
|
| 823 |
+
"### Pain Point:\n",
|
| 824 |
+
"**Lack of real-time, end-to-end visibility combined with the inability to dynamically respond to disruptions such as delays, demand fluctuations, or route changes leads to inefficiencies, increased costs, and poor customer satisfaction.**\n",
|
| 825 |
+
"\n",
|
| 826 |
+
"This challenge arises due to fragmented data sources, siloed systems, the complexity of global networks, and unpredictable events (e.g., weather, geopolitical issues, or sudden spikes in demand).\n",
|
| 827 |
+
"\n",
|
| 828 |
+
"---\n",
|
| 829 |
+
"\n",
|
| 830 |
+
"### Why It’s Ripe for an Agentic Solution:\n",
|
| 831 |
+
"An agentic solution — autonomous, proactive agents capable of perceiving the environment, reasoning about it, and making decisions — can address this by:\n",
|
| 832 |
+
"\n",
|
| 833 |
+
"- **Continuously monitoring multiple data streams** (IoT sensors, GPS, inventory levels, weather forecasts)\n",
|
| 834 |
+
"- **Predicting potential disruptions** using AI and machine learning models\n",
|
| 835 |
+
"- **Autonomously recommending or executing mitigation actions** such as rerouting shipments, adjusting inventory allocations, or rescheduling deliveries\n",
|
| 836 |
+
"- **Collaborating with other agents** representing different stakeholders (suppliers, carriers, warehouses) to optimize decisions dynamically\n",
|
| 837 |
+
"\n",
|
| 838 |
+
"---\n",
|
| 839 |
+
"\n",
|
| 840 |
+
"### Example Agentic Solution Concept:\n",
|
| 841 |
+
"An autonomous logistics agent platform that:\n",
|
| 842 |
+
"\n",
|
| 843 |
+
"- Integrates real-time data from all nodes in the supply chain\n",
|
| 844 |
+
"- Uses predictive analytics to forecast delays or bottlenecks\n",
|
| 845 |
+
"- Communicates with other agents to negotiate alternative routes or reorder supplies dynamically\n",
|
| 846 |
+
"- Provides actionable insights and automated interventions to human managers when necessary\n",
|
| 847 |
+
"\n",
|
| 848 |
+
"This kind of solution could drastically reduce downtime, enhance responsiveness, and improve overall supply chain resilience.\n"
|
| 849 |
+
]
|
| 850 |
+
}
|
| 851 |
+
],
|
| 852 |
+
"source": [
|
| 853 |
+
"response = openai.chat.completions.create(\n",
|
| 854 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 855 |
+
" messages=messages\n",
|
| 856 |
+
")\n",
|
| 857 |
+
"\n",
|
| 858 |
+
"question = response.choices[0].message.content\n",
|
| 859 |
+
"\n",
|
| 860 |
+
"print(question)"
|
| 861 |
+
]
|
| 862 |
+
},
|
| 863 |
+
{
|
| 864 |
+
"cell_type": "code",
|
| 865 |
+
"execution_count": 26,
|
| 866 |
+
"metadata": {},
|
| 867 |
+
"outputs": [],
|
| 868 |
+
"source": [
|
| 869 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 870 |
+
]
|
| 871 |
+
},
|
| 872 |
+
{
|
| 873 |
+
"cell_type": "code",
|
| 874 |
+
"execution_count": 28,
|
| 875 |
+
"metadata": {},
|
| 876 |
+
"outputs": [],
|
| 877 |
+
"source": [
|
| 878 |
+
"question = \"propose a solution for the pain point in the supply chain and logistics management something challenging that might be ripe for agentic solution\"\n",
|
| 879 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 880 |
+
]
|
| 881 |
+
},
|
| 882 |
+
{
|
| 883 |
+
"cell_type": "code",
|
| 884 |
+
"execution_count": 29,
|
| 885 |
+
"metadata": {},
|
| 886 |
+
"outputs": [
|
| 887 |
+
{
|
| 888 |
+
"name": "stdout",
|
| 889 |
+
"output_type": "stream",
|
| 890 |
+
"text": [
|
| 891 |
+
"Certainly! One major pain point in supply chain and logistics management is **real-time visibility and proactive disruption management**. Many supply chains suffer from a lack of timely information flow across diverse stakeholders—manufacturers, suppliers, carriers, warehouses, and retailers—which results in delayed responses to disruptions like delays, inventory shortages, or demand spikes.\n",
|
| 892 |
+
"\n",
|
| 893 |
+
"---\n",
|
| 894 |
+
"\n",
|
| 895 |
+
"### Pain Point\n",
|
| 896 |
+
"**Lack of real-time, end-to-end visibility and autonomous decision-making in supply chains leads to inefficiencies, increased costs, and poor customer service.**\n",
|
| 897 |
+
"\n",
|
| 898 |
+
"- Multiple parties operate in silos with fragmented data.\n",
|
| 899 |
+
"- Human managers are overwhelmed with monitoring and cannot anticipate disruptions quickly.\n",
|
| 900 |
+
"- Reactive responses lead to cascading delays and higher operational costs.\n",
|
| 901 |
+
"- Difficulty in dynamically re-routing shipments or reallocating inventory without delays.\n",
|
| 902 |
+
"\n",
|
| 903 |
+
"---\n",
|
| 904 |
+
"\n",
|
| 905 |
+
"### Proposed Agentic Solution: Autonomous Multi-Agent Supply Chain Management System\n",
|
| 906 |
+
"\n",
|
| 907 |
+
"**Concept:** \n",
|
| 908 |
+
"Develop a network of intelligent, collaborative software agents, each representing different actors and functions in the supply chain. These agents operate autonomously, communicating in real-time, sharing information, and making localized decisions that collectively optimize the supply chain’s performance dynamically.\n",
|
| 909 |
+
"\n",
|
| 910 |
+
"---\n",
|
| 911 |
+
"\n",
|
| 912 |
+
"### Key Features\n",
|
| 913 |
+
"\n",
|
| 914 |
+
"1. **Distributed Agent Network:** \n",
|
| 915 |
+
" - Agents assigned to different nodes: suppliers, warehouses, transport carriers, retailers.\n",
|
| 916 |
+
" - Each agent monitors its local environment using IoT sensors, ERP systems, and external data sources (weather, traffic, geopolitical events).\n",
|
| 917 |
+
" \n",
|
| 918 |
+
"2. **Real-Time Data Fusion:** \n",
|
| 919 |
+
" - Agents aggregate data across the network creating an up-to-date digital twin of the supply chain.\n",
|
| 920 |
+
" - Enables immediate detection of anomalies or disruptions.\n",
|
| 921 |
+
"\n",
|
| 922 |
+
"3. **Proactive Disruption Prediction & Response:** \n",
|
| 923 |
+
" - Agents employ machine learning and predictive analytics to forecast delays, demand surges, or capacity shortages.\n",
|
| 924 |
+
" - Automatically explore alternative options for rerouting, expedited shipping, or inventory reallocation.\n",
|
| 925 |
+
" - Negotiate and coordinate with other agents to implement solutions without human intervention.\n",
|
| 926 |
+
"\n",
|
| 927 |
+
"4. **Dynamic Optimization & Learning:** \n",
|
| 928 |
+
" - Continuously optimize cost, delivery speed, and inventory levels.\n",
|
| 929 |
+
" - Learn from past disruptions to improve future responses.\n",
|
| 930 |
+
"\n",
|
| 931 |
+
"5. **Human-in-the-Loop Control:** \n",
|
| 932 |
+
" - Alerts and recommendations are presented to human managers for oversight.\n",
|
| 933 |
+
" - Human input used to fine-tune agent policies and handle exceptions.\n",
|
| 934 |
+
"\n",
|
| 935 |
+
"---\n",
|
| 936 |
+
"\n",
|
| 937 |
+
"### Benefits\n",
|
| 938 |
+
"\n",
|
| 939 |
+
"- Significant reduction in disruption-related delays and costs.\n",
|
| 940 |
+
"- Increased agility and resilience to shocks (natural disasters, supplier failures, sudden demand changes).\n",
|
| 941 |
+
"- Better resource utilization and customer satisfaction.\n",
|
| 942 |
+
"- Scalability across complex, global supply chains.\n",
|
| 943 |
+
"\n",
|
| 944 |
+
"---\n",
|
| 945 |
+
"\n",
|
| 946 |
+
"### Example Scenario:\n",
|
| 947 |
+
"\n",
|
| 948 |
+
"If a port strike delays shipment, the supplier agent detects the disruption early. It negotiates with logistics carriers’ agents to find alternative transport routes or modes (air freight vs sea). Warehouse agents pre-emptively reallocate stock to cover shortages at affected locations. Retail agents adjust promotions and stock levels, all coordinated seamlessly without waiting for human decisions.\n",
|
| 949 |
+
"\n",
|
| 950 |
+
"---\n",
|
| 951 |
+
"\n",
|
| 952 |
+
"### Technologies to Leverage\n",
|
| 953 |
+
"\n",
|
| 954 |
+
"- Multi-agent Reinforcement Learning (MARL)\n",
|
| 955 |
+
"- IoT and edge computing for sensor data collection\n",
|
| 956 |
+
"- Blockchain for secure, transparent data sharing\n",
|
| 957 |
+
"- Cloud platforms for scalability\n",
|
| 958 |
+
"\n",
|
| 959 |
+
"---\n",
|
| 960 |
+
"\n",
|
| 961 |
+
"This agentic solution addresses a critical, complex supply chain pain point by leveraging autonomous collaboration and intelligent decision-making—ripe for innovation and impact. If you'd like, I can also help sketch out a roadmap or technical architecture for such a system!\n"
|
| 962 |
+
]
|
| 963 |
+
}
|
| 964 |
+
],
|
| 965 |
+
"source": [
|
| 966 |
+
"response = openai.chat.completions.create(\n",
|
| 967 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 968 |
+
" messages=messages\n",
|
| 969 |
+
")\n",
|
| 970 |
+
"\n",
|
| 971 |
+
"question = response.choices[0].message.content\n",
|
| 972 |
+
"\n",
|
| 973 |
+
"print(question)"
|
| 974 |
+
]
|
| 975 |
+
}
|
| 976 |
+
],
|
| 977 |
+
"metadata": {
|
| 978 |
+
"kernelspec": {
|
| 979 |
+
"display_name": ".venv",
|
| 980 |
+
"language": "python",
|
| 981 |
+
"name": "python3"
|
| 982 |
+
},
|
| 983 |
+
"language_info": {
|
| 984 |
+
"codemirror_mode": {
|
| 985 |
+
"name": "ipython",
|
| 986 |
+
"version": 3
|
| 987 |
+
},
|
| 988 |
+
"file_extension": ".py",
|
| 989 |
+
"mimetype": "text/x-python",
|
| 990 |
+
"name": "python",
|
| 991 |
+
"nbconvert_exporter": "python",
|
| 992 |
+
"pygments_lexer": "ipython3",
|
| 993 |
+
"version": "3.12.10"
|
| 994 |
+
}
|
| 995 |
+
},
|
| 996 |
+
"nbformat": 4,
|
| 997 |
+
"nbformat_minor": 2
|
| 998 |
+
}
|
2_lab2.ipynb
ADDED
|
@@ -0,0 +1,492 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": null,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"import os\n",
|
| 39 |
+
"import json\n",
|
| 40 |
+
"from dotenv import load_dotenv\n",
|
| 41 |
+
"from openai import OpenAI\n",
|
| 42 |
+
"from anthropic import Anthropic\n",
|
| 43 |
+
"from IPython.display import Markdown, display"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"# Always remember to do this!\n",
|
| 53 |
+
"load_dotenv(override=True)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"if openai_api_key:\n",
|
| 71 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
+
" \n",
|
| 75 |
+
"if anthropic_api_key:\n",
|
| 76 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"if google_api_key:\n",
|
| 81 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
+
"else:\n",
|
| 83 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"if deepseek_api_key:\n",
|
| 86 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
+
"else:\n",
|
| 88 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"if groq_api_key:\n",
|
| 91 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
+
"else:\n",
|
| 93 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": null,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "code",
|
| 109 |
+
"execution_count": null,
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"source": [
|
| 113 |
+
"messages"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": null,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [],
|
| 121 |
+
"source": [
|
| 122 |
+
"openai = OpenAI()\n",
|
| 123 |
+
"response = openai.chat.completions.create(\n",
|
| 124 |
+
" model=\"gpt-5-mini\",\n",
|
| 125 |
+
" messages=messages,\n",
|
| 126 |
+
")\n",
|
| 127 |
+
"question = response.choices[0].message.content\n",
|
| 128 |
+
"print(question)\n"
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"cell_type": "code",
|
| 133 |
+
"execution_count": null,
|
| 134 |
+
"metadata": {},
|
| 135 |
+
"outputs": [],
|
| 136 |
+
"source": [
|
| 137 |
+
"competitors = []\n",
|
| 138 |
+
"answers = []\n",
|
| 139 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "markdown",
|
| 144 |
+
"metadata": {},
|
| 145 |
+
"source": [
|
| 146 |
+
"## Note - update since the videos\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"I've updated the model names to use the latest models below, like GPT 5 and Claude Sonnet 4.5. It's worth noting that these models can be quite slow - like 1-2 minutes - but they do a great job! Feel free to switch them for faster models if you'd prefer, like the ones I use in the video."
|
| 149 |
+
]
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"cell_type": "code",
|
| 153 |
+
"execution_count": null,
|
| 154 |
+
"metadata": {},
|
| 155 |
+
"outputs": [],
|
| 156 |
+
"source": [
|
| 157 |
+
"# The API we know well\n",
|
| 158 |
+
"# I've updated this with the latest model, but it can take some time because it likes to think!\n",
|
| 159 |
+
"# Replace the model with gpt-4.1-mini if you'd prefer not to wait 1-2 mins\n",
|
| 160 |
+
"\n",
|
| 161 |
+
"model_name = \"gpt-5-nano\"\n",
|
| 162 |
+
"\n",
|
| 163 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 164 |
+
"answer = response.choices[0].message.content\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"display(Markdown(answer))\n",
|
| 167 |
+
"competitors.append(model_name)\n",
|
| 168 |
+
"answers.append(answer)"
|
| 169 |
+
]
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"cell_type": "code",
|
| 173 |
+
"execution_count": null,
|
| 174 |
+
"metadata": {},
|
| 175 |
+
"outputs": [],
|
| 176 |
+
"source": [
|
| 177 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 178 |
+
"\n",
|
| 179 |
+
"model_name = \"claude-sonnet-4-5\"\n",
|
| 180 |
+
"\n",
|
| 181 |
+
"claude = Anthropic()\n",
|
| 182 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 183 |
+
"answer = response.content[0].text\n",
|
| 184 |
+
"\n",
|
| 185 |
+
"display(Markdown(answer))\n",
|
| 186 |
+
"competitors.append(model_name)\n",
|
| 187 |
+
"answers.append(answer)"
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"cell_type": "code",
|
| 192 |
+
"execution_count": null,
|
| 193 |
+
"metadata": {},
|
| 194 |
+
"outputs": [],
|
| 195 |
+
"source": [
|
| 196 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 197 |
+
"model_name = \"gemini-2.5-flash\"\n",
|
| 198 |
+
"\n",
|
| 199 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 200 |
+
"answer = response.choices[0].message.content\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"display(Markdown(answer))\n",
|
| 203 |
+
"competitors.append(model_name)\n",
|
| 204 |
+
"answers.append(answer)"
|
| 205 |
+
]
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"cell_type": "code",
|
| 209 |
+
"execution_count": null,
|
| 210 |
+
"metadata": {},
|
| 211 |
+
"outputs": [],
|
| 212 |
+
"source": [
|
| 213 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 214 |
+
"model_name = \"deepseek-chat\"\n",
|
| 215 |
+
"\n",
|
| 216 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 217 |
+
"answer = response.choices[0].message.content\n",
|
| 218 |
+
"\n",
|
| 219 |
+
"display(Markdown(answer))\n",
|
| 220 |
+
"competitors.append(model_name)\n",
|
| 221 |
+
"answers.append(answer)"
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"cell_type": "code",
|
| 226 |
+
"execution_count": null,
|
| 227 |
+
"metadata": {},
|
| 228 |
+
"outputs": [],
|
| 229 |
+
"source": [
|
| 230 |
+
"# Updated with the latest Open Source model from OpenAI\n",
|
| 231 |
+
"\n",
|
| 232 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 233 |
+
"model_name = \"openai/gpt-oss-120b\"\n",
|
| 234 |
+
"\n",
|
| 235 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 236 |
+
"answer = response.choices[0].message.content\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"display(Markdown(answer))\n",
|
| 239 |
+
"competitors.append(model_name)\n",
|
| 240 |
+
"answers.append(answer)\n"
|
| 241 |
+
]
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"cell_type": "markdown",
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"source": [
|
| 247 |
+
"## For the next cell, we will use Ollama\n",
|
| 248 |
+
"\n",
|
| 249 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 250 |
+
"and runs models locally using high performance C++ code.\n",
|
| 251 |
+
"\n",
|
| 252 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 253 |
+
"\n",
|
| 254 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 255 |
+
"\n",
|
| 256 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 257 |
+
"\n",
|
| 258 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 259 |
+
"\n",
|
| 260 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 261 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 262 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 263 |
+
]
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"cell_type": "markdown",
|
| 267 |
+
"metadata": {},
|
| 268 |
+
"source": [
|
| 269 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 270 |
+
" <tr>\n",
|
| 271 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 272 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 273 |
+
" </td>\n",
|
| 274 |
+
" <td>\n",
|
| 275 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 276 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 277 |
+
" </span>\n",
|
| 278 |
+
" </td>\n",
|
| 279 |
+
" </tr>\n",
|
| 280 |
+
"</table>"
|
| 281 |
+
]
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"cell_type": "code",
|
| 285 |
+
"execution_count": null,
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"outputs": [],
|
| 288 |
+
"source": [
|
| 289 |
+
"!ollama pull llama3.2"
|
| 290 |
+
]
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"cell_type": "code",
|
| 294 |
+
"execution_count": null,
|
| 295 |
+
"metadata": {},
|
| 296 |
+
"outputs": [],
|
| 297 |
+
"source": [
|
| 298 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 299 |
+
"model_name = \"llama3.2\"\n",
|
| 300 |
+
"\n",
|
| 301 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 302 |
+
"answer = response.choices[0].message.content\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"display(Markdown(answer))\n",
|
| 305 |
+
"competitors.append(model_name)\n",
|
| 306 |
+
"answers.append(answer)"
|
| 307 |
+
]
|
| 308 |
+
},
|
| 309 |
+
{
|
| 310 |
+
"cell_type": "code",
|
| 311 |
+
"execution_count": null,
|
| 312 |
+
"metadata": {},
|
| 313 |
+
"outputs": [],
|
| 314 |
+
"source": [
|
| 315 |
+
"# So where are we?\n",
|
| 316 |
+
"\n",
|
| 317 |
+
"print(competitors)\n",
|
| 318 |
+
"print(answers)\n"
|
| 319 |
+
]
|
| 320 |
+
},
|
| 321 |
+
{
|
| 322 |
+
"cell_type": "code",
|
| 323 |
+
"execution_count": null,
|
| 324 |
+
"metadata": {},
|
| 325 |
+
"outputs": [],
|
| 326 |
+
"source": [
|
| 327 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 328 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 329 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 330 |
+
]
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"cell_type": "code",
|
| 334 |
+
"execution_count": null,
|
| 335 |
+
"metadata": {},
|
| 336 |
+
"outputs": [],
|
| 337 |
+
"source": [
|
| 338 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 339 |
+
"\n",
|
| 340 |
+
"together = \"\"\n",
|
| 341 |
+
"for index, answer in enumerate(answers):\n",
|
| 342 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 343 |
+
" together += answer + \"\\n\\n\""
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"cell_type": "code",
|
| 348 |
+
"execution_count": null,
|
| 349 |
+
"metadata": {},
|
| 350 |
+
"outputs": [],
|
| 351 |
+
"source": [
|
| 352 |
+
"print(together)"
|
| 353 |
+
]
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"cell_type": "markdown",
|
| 357 |
+
"metadata": {},
|
| 358 |
+
"source": []
|
| 359 |
+
},
|
| 360 |
+
{
|
| 361 |
+
"cell_type": "code",
|
| 362 |
+
"execution_count": null,
|
| 363 |
+
"metadata": {},
|
| 364 |
+
"outputs": [],
|
| 365 |
+
"source": [
|
| 366 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 367 |
+
"Each model has been given this question:\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"{question}\n",
|
| 370 |
+
"\n",
|
| 371 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 372 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 373 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 374 |
+
"\n",
|
| 375 |
+
"Here are the responses from each competitor:\n",
|
| 376 |
+
"\n",
|
| 377 |
+
"{together}\n",
|
| 378 |
+
"\n",
|
| 379 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"cell_type": "code",
|
| 384 |
+
"execution_count": null,
|
| 385 |
+
"metadata": {},
|
| 386 |
+
"outputs": [],
|
| 387 |
+
"source": [
|
| 388 |
+
"print(judge)"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"cell_type": "code",
|
| 393 |
+
"execution_count": null,
|
| 394 |
+
"metadata": {},
|
| 395 |
+
"outputs": [],
|
| 396 |
+
"source": [
|
| 397 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"cell_type": "code",
|
| 402 |
+
"execution_count": null,
|
| 403 |
+
"metadata": {},
|
| 404 |
+
"outputs": [],
|
| 405 |
+
"source": [
|
| 406 |
+
"# Judgement time!\n",
|
| 407 |
+
"\n",
|
| 408 |
+
"openai = OpenAI()\n",
|
| 409 |
+
"response = openai.chat.completions.create(\n",
|
| 410 |
+
" model=\"gpt-5-mini\",\n",
|
| 411 |
+
" messages=judge_messages,\n",
|
| 412 |
+
")\n",
|
| 413 |
+
"results = response.choices[0].message.content\n",
|
| 414 |
+
"print(results)\n"
|
| 415 |
+
]
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"cell_type": "code",
|
| 419 |
+
"execution_count": null,
|
| 420 |
+
"metadata": {},
|
| 421 |
+
"outputs": [],
|
| 422 |
+
"source": [
|
| 423 |
+
"# OK let's turn this into results!\n",
|
| 424 |
+
"\n",
|
| 425 |
+
"results_dict = json.loads(results)\n",
|
| 426 |
+
"ranks = results_dict[\"results\"]\n",
|
| 427 |
+
"for index, result in enumerate(ranks):\n",
|
| 428 |
+
" competitor = competitors[int(result)-1]\n",
|
| 429 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 430 |
+
]
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"cell_type": "markdown",
|
| 434 |
+
"metadata": {},
|
| 435 |
+
"source": [
|
| 436 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 437 |
+
" <tr>\n",
|
| 438 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 439 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 440 |
+
" </td>\n",
|
| 441 |
+
" <td>\n",
|
| 442 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 443 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 444 |
+
" </span>\n",
|
| 445 |
+
" </td>\n",
|
| 446 |
+
" </tr>\n",
|
| 447 |
+
"</table>"
|
| 448 |
+
]
|
| 449 |
+
},
|
| 450 |
+
{
|
| 451 |
+
"cell_type": "markdown",
|
| 452 |
+
"metadata": {},
|
| 453 |
+
"source": [
|
| 454 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 455 |
+
" <tr>\n",
|
| 456 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 457 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 458 |
+
" </td>\n",
|
| 459 |
+
" <td>\n",
|
| 460 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 461 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 462 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 463 |
+
" to business projects where accuracy is critical.\n",
|
| 464 |
+
" </span>\n",
|
| 465 |
+
" </td>\n",
|
| 466 |
+
" </tr>\n",
|
| 467 |
+
"</table>"
|
| 468 |
+
]
|
| 469 |
+
}
|
| 470 |
+
],
|
| 471 |
+
"metadata": {
|
| 472 |
+
"kernelspec": {
|
| 473 |
+
"display_name": ".venv",
|
| 474 |
+
"language": "python",
|
| 475 |
+
"name": "python3"
|
| 476 |
+
},
|
| 477 |
+
"language_info": {
|
| 478 |
+
"codemirror_mode": {
|
| 479 |
+
"name": "ipython",
|
| 480 |
+
"version": 3
|
| 481 |
+
},
|
| 482 |
+
"file_extension": ".py",
|
| 483 |
+
"mimetype": "text/x-python",
|
| 484 |
+
"name": "python",
|
| 485 |
+
"nbconvert_exporter": "python",
|
| 486 |
+
"pygments_lexer": "ipython3",
|
| 487 |
+
"version": "3.12.9"
|
| 488 |
+
}
|
| 489 |
+
},
|
| 490 |
+
"nbformat": 4,
|
| 491 |
+
"nbformat_minor": 2
|
| 492 |
+
}
|
3_lab3.ipynb
ADDED
|
@@ -0,0 +1,720 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to Lab 3 for Week 1 Day 4\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we're going to build something with immediate value!\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"In the folder `me` I've put a single file `linkedin.pdf` - it's a PDF download of my LinkedIn profile.\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"Please replace it with yours!\n",
|
| 14 |
+
"\n",
|
| 15 |
+
"I've also made a file called `summary.txt`\n",
|
| 16 |
+
"\n",
|
| 17 |
+
"We're not going to use Tools just yet - we're going to add the tool tomorrow."
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "markdown",
|
| 22 |
+
"metadata": {},
|
| 23 |
+
"source": [
|
| 24 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 25 |
+
" <tr>\n",
|
| 26 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 27 |
+
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 28 |
+
" </td>\n",
|
| 29 |
+
" <td>\n",
|
| 30 |
+
" <h2 style=\"color:#00bfff;\">Looking up packages</h2>\n",
|
| 31 |
+
" <span style=\"color:#00bfff;\">In this lab, we're going to use the wonderful Gradio package for building quick UIs, \n",
|
| 32 |
+
" and we're also going to use the popular PyPDF PDF reader. You can get guides to these packages by asking \n",
|
| 33 |
+
" ChatGPT or Claude, and you find all open-source packages on the repository <a href=\"https://pypi.org\">https://pypi.org</a>.\n",
|
| 34 |
+
" </span>\n",
|
| 35 |
+
" </td>\n",
|
| 36 |
+
" </tr>\n",
|
| 37 |
+
"</table>"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": 1,
|
| 43 |
+
"metadata": {},
|
| 44 |
+
"outputs": [],
|
| 45 |
+
"source": [
|
| 46 |
+
"# If you don't know what any of these packages do - you can always ask ChatGPT for a guide!\n",
|
| 47 |
+
"\n",
|
| 48 |
+
"from dotenv import load_dotenv\n",
|
| 49 |
+
"from openai import OpenAI\n",
|
| 50 |
+
"from pypdf import PdfReader\n",
|
| 51 |
+
"import gradio as gr"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "code",
|
| 56 |
+
"execution_count": 2,
|
| 57 |
+
"metadata": {},
|
| 58 |
+
"outputs": [],
|
| 59 |
+
"source": [
|
| 60 |
+
"load_dotenv(override=True)\n",
|
| 61 |
+
"openai = OpenAI()"
|
| 62 |
+
]
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"cell_type": "code",
|
| 66 |
+
"execution_count": 3,
|
| 67 |
+
"metadata": {},
|
| 68 |
+
"outputs": [],
|
| 69 |
+
"source": [
|
| 70 |
+
"reader = PdfReader(\"me/linkedin.pdf\")\n",
|
| 71 |
+
"linkedin = \"\"\n",
|
| 72 |
+
"for page in reader.pages:\n",
|
| 73 |
+
" text = page.extract_text()\n",
|
| 74 |
+
" if text:\n",
|
| 75 |
+
" linkedin += text"
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"cell_type": "code",
|
| 80 |
+
"execution_count": 4,
|
| 81 |
+
"metadata": {},
|
| 82 |
+
"outputs": [
|
| 83 |
+
{
|
| 84 |
+
"name": "stdout",
|
| 85 |
+
"output_type": "stream",
|
| 86 |
+
"text": [
|
| 87 |
+
" \n",
|
| 88 |
+
"Contact\n",
|
| 89 |
+
"[email protected]\n",
|
| 90 |
+
"www.linkedin.com/in/urvashi-\n",
|
| 91 |
+
"p-72287631 (LinkedIn)\n",
|
| 92 |
+
"integrationinsider.com (Blog)\n",
|
| 93 |
+
"urvashipatel.io/ (Personal)\n",
|
| 94 |
+
"Top Skills\n",
|
| 95 |
+
"n8n\n",
|
| 96 |
+
"Zapier\n",
|
| 97 |
+
"Workflow Automation\n",
|
| 98 |
+
"Languages\n",
|
| 99 |
+
"English (Full Professional)\n",
|
| 100 |
+
"Gujarati (Native or Bilingual)\n",
|
| 101 |
+
"Hindi (Native or Bilingual)\n",
|
| 102 |
+
"Certifications\n",
|
| 103 |
+
"ITIL Foundation\n",
|
| 104 |
+
"Dell Boomi Associate Developer\n",
|
| 105 |
+
"Certification\n",
|
| 106 |
+
"Professional API Design Certification\n",
|
| 107 |
+
"Prince2 Foundation\n",
|
| 108 |
+
"Emergency First Aider at Work\n",
|
| 109 |
+
"Urvashi Patel\n",
|
| 110 |
+
"Senior Integration Developer |Boomi, EDI, API-led Design &\n",
|
| 111 |
+
"Automation | Writing at Integration Insider\n",
|
| 112 |
+
"Birmingham, England, United Kingdom\n",
|
| 113 |
+
"Summary\n",
|
| 114 |
+
"With 14+ years of experience in enterprise integration, I specialise\n",
|
| 115 |
+
"in designing and delivering Any-to-Any integrations across EDI,\n",
|
| 116 |
+
"API, B2B, B2C, and complex data workflows. My work spans\n",
|
| 117 |
+
"manufacturing, healthcare, distribution, and logistics - sectors where\n",
|
| 118 |
+
"reliability, clarity, and seamless data flow are essential.\n",
|
| 119 |
+
"I focus on building integration solutions that simplify complexity,\n",
|
| 120 |
+
"improve system communication, and support real business\n",
|
| 121 |
+
"outcomes. Whether working with Boomi, Azure integration services,\n",
|
| 122 |
+
"API-led architecture, or event-driven patterns, I prioritise robustness,\n",
|
| 123 |
+
"maintainability, and alignment with enterprise standards.\n",
|
| 124 |
+
"Collaboration and problem-solving drive how I work. I enjoy\n",
|
| 125 |
+
"partnering with cross-functional teams, strengthening integration best\n",
|
| 126 |
+
"practices, and creating solutions that are scalable, secure, and ready\n",
|
| 127 |
+
"for the future. My approach blends technical depth with thoughtful\n",
|
| 128 |
+
"design, ensuring every integration delivers long-term value to the\n",
|
| 129 |
+
"organisation.\n",
|
| 130 |
+
"Experience\n",
|
| 131 |
+
"Nexora\n",
|
| 132 |
+
"3 years 5 months\n",
|
| 133 |
+
"Senior Integration Developer\n",
|
| 134 |
+
"December 2025 - Present (1 month)\n",
|
| 135 |
+
"United Kingdom\n",
|
| 136 |
+
"• Designing and developing integrations across CRM, ERP, EDI, Fabric, and\n",
|
| 137 |
+
"cloud platforms\n",
|
| 138 |
+
"• Defining patterns, best practices, and consistent design approaches for the\n",
|
| 139 |
+
"wider team\n",
|
| 140 |
+
"• Collaborating with cross-functional teams to clarify requirements and reduce\n",
|
| 141 |
+
"ambiguity\n",
|
| 142 |
+
" Page 1 of 6 \n",
|
| 143 |
+
"• Supporting API-led architecture initiatives and event-driven designs\n",
|
| 144 |
+
"• Ensuring secure, compliant integrations with strong logging, error handling,\n",
|
| 145 |
+
"and governance\n",
|
| 146 |
+
"• Helping uplift team capability through knowledge sharing and simplifying\n",
|
| 147 |
+
"complex concepts\n",
|
| 148 |
+
"Integration Developer\n",
|
| 149 |
+
"August 2022 - December 2025 (3 years 5 months)\n",
|
| 150 |
+
"United Kingdom\n",
|
| 151 |
+
"• Architected and delivered high-impact integrations using Boomi across DCC\n",
|
| 152 |
+
"entities (Nordics, France, Amacom, Ireland, ProTech, Azenn, Exertis UK) for\n",
|
| 153 |
+
"CRM, ERP, FABRIC.\n",
|
| 154 |
+
"• Designed API-led integration architecture between Dynamics 365 CRM,\n",
|
| 155 |
+
"Fabric, and SAP.\n",
|
| 156 |
+
"• Standardised internal practices for logging, error handling, and SSL\n",
|
| 157 |
+
"management; introduced a Change and Error Management Process now\n",
|
| 158 |
+
"adopted team wide.\n",
|
| 159 |
+
"• Planned and led legacy EDI migration to Boomi, ensuring smooth transition\n",
|
| 160 |
+
"and improved monitoring capabilities.\n",
|
| 161 |
+
"• Managed Azure resources - VM Start/Stop Automated, Proficient in cost\n",
|
| 162 |
+
"optimisation, and runtime maintenance of Boomi Atoms.\n",
|
| 163 |
+
"• Configured secure AS2, FTP, and SFTP connections and handled SSL\n",
|
| 164 |
+
"installations on Linux Ubuntu servers.\n",
|
| 165 |
+
"• Created a centralised Knowledge Base on SharePoint for integration\n",
|
| 166 |
+
"documentation and troubleshooting.\n",
|
| 167 |
+
"• Initiated exploration of Boomi Agentic AI for Integration.\n",
|
| 168 |
+
"• Played a key role in post-divestment Atom migration, ensuring business\n",
|
| 169 |
+
"continuity and zero downtime.\n",
|
| 170 |
+
"• Built Proof of concept to deploy Boomi atom as Azure App service using\n",
|
| 171 |
+
"Docker Image of the Boomi runtime to explore serverless.\n",
|
| 172 |
+
"• Installed and administering Boomi Atoms on Linux Ubuntu, including\n",
|
| 173 |
+
"configuration, upgrades, and monitoring of runtime environments.\n",
|
| 174 |
+
"• Supported Boomi licence management, maintaining accurate licence\n",
|
| 175 |
+
"allocation across environments and coordinating reallocation to sub-accounts\n",
|
| 176 |
+
"and business units to optimise usage and compliance.\n",
|
| 177 |
+
"• Provided Boomi technical support to business units, including conducting\n",
|
| 178 |
+
"proof-of-concepts (“art of the possible”), debugging complex processes, and\n",
|
| 179 |
+
"advising on best-practice integration designs to meet evolving business needs.\n",
|
| 180 |
+
"Culina Group Limited\n",
|
| 181 |
+
" Page 2 of 6 \n",
|
| 182 |
+
"Integration Specialist\n",
|
| 183 |
+
"January 2022 - July 2022 (7 months)\n",
|
| 184 |
+
"Led end-to-end integration projects at Eddie Stobart, specializing in EDI and\n",
|
| 185 |
+
"B2B integrations.\n",
|
| 186 |
+
"Worked on integration projects for onboarding new customers/suppliers using\n",
|
| 187 |
+
"Dell Boomi's Atomsphere Platform.\n",
|
| 188 |
+
"Involved in projects for the integration of transport systems and warehouse\n",
|
| 189 |
+
"management systems.\n",
|
| 190 |
+
"Worked on warehouse migration project from legacy systems, ensuring a\n",
|
| 191 |
+
"seamless transition.\n",
|
| 192 |
+
"Implemented business-to-business integrations using Boomi, enhancing\n",
|
| 193 |
+
"communication and collaboration with external partners.\n",
|
| 194 |
+
"Worked on projects to integrate our transport system with customers' systems\n",
|
| 195 |
+
"using APIs and Dell Boomi middleware.\n",
|
| 196 |
+
"Developed and managed AS2, FTP, and SFTP connections to facilitate data\n",
|
| 197 |
+
"exchange between systems.\n",
|
| 198 |
+
"Collaborated closely with cross-functional teams to gather requirements and\n",
|
| 199 |
+
"translate them into scalable integration solutions.\n",
|
| 200 |
+
"Streamlined EDI processes, resulting in significant time and cost savings for\n",
|
| 201 |
+
"Eddie Stobart.\n",
|
| 202 |
+
"Successfully resolved complex integration issues, ensuring uninterrupted\n",
|
| 203 |
+
"business operations and customer satisfaction.\n",
|
| 204 |
+
"Proactively maintained and monitored integration platforms, ensuring optimal\n",
|
| 205 |
+
"performance and connectivity.\n",
|
| 206 |
+
"Eddie Stobart\n",
|
| 207 |
+
"3 years 9 months\n",
|
| 208 |
+
"B2B/EDI Engineer\n",
|
| 209 |
+
"April 2021 - January 2022 (10 months)\n",
|
| 210 |
+
"B2B/EDI Analyst at Eddie Stobart\n",
|
| 211 |
+
"May 2018 - March 2021 (2 years 11 months)\n",
|
| 212 |
+
"EDI Analyst\n",
|
| 213 |
+
"May 2018 - March 2020 (1 year 11 months)\n",
|
| 214 |
+
"Birmingham, United Kingdom\n",
|
| 215 |
+
"Led end-to-end integration projects at Eddie Stobart, specializing in EDI and\n",
|
| 216 |
+
"B2B integrations.\n",
|
| 217 |
+
"Worked on integration projects for onboarding new customers/suppliers using\n",
|
| 218 |
+
"Dell Boomi's Atomsphere Platform.\n",
|
| 219 |
+
" Page 3 of 6 \n",
|
| 220 |
+
"Involved in projects for the integration of transport systems and warehouse\n",
|
| 221 |
+
"management systems.\n",
|
| 222 |
+
"Worked on warehouse migration project from legacy systems, ensuring a\n",
|
| 223 |
+
"seamless transition.\n",
|
| 224 |
+
"Implemented business-to-business integrations using Boomi, enhancing\n",
|
| 225 |
+
"communication and collaboration with external partners.\n",
|
| 226 |
+
"Worked on projects to integrate our transport system with customers' systems\n",
|
| 227 |
+
"using APIs and Dell Boomi middleware.\n",
|
| 228 |
+
"Developed and managed AS2, FTP, and SFTP connections to facilitate data\n",
|
| 229 |
+
"exchange between systems.\n",
|
| 230 |
+
"Collaborated closely with cross-functional teams to gather requirements and\n",
|
| 231 |
+
"translate them into scalable integration solutions.\n",
|
| 232 |
+
"Streamlined EDI processes, resulting in significant time and cost savings for\n",
|
| 233 |
+
"Eddie Stobart.\n",
|
| 234 |
+
"Successfully resolved complex integration issues, ensuring uninterrupted\n",
|
| 235 |
+
"business operations and customer satisfaction.\n",
|
| 236 |
+
"Proactively maintained and monitored integration platforms, ensuring optimal\n",
|
| 237 |
+
"performance and connectivity.\n",
|
| 238 |
+
"ABP Food Group\n",
|
| 239 |
+
"Integration Analyst\n",
|
| 240 |
+
"June 2015 - April 2018 (2 years 11 months)\n",
|
| 241 |
+
"Birmingham, United Kingdom\n",
|
| 242 |
+
"Responsible for all EDI integrations, including gathering requirements from\n",
|
| 243 |
+
"customers and liaising with third-party EDI service providers to set up new\n",
|
| 244 |
+
"customers.\n",
|
| 245 |
+
"Utilized M3 E Collaborator (MEC by Infor) to map all customer's EDI to ERP\n",
|
| 246 |
+
"systems, ensuring seamless data exchange.\n",
|
| 247 |
+
"Automated business processes using Operator, significantly reducing\n",
|
| 248 |
+
"repetitive tasks and enhancing operational efficiency.\n",
|
| 249 |
+
"Contributed to an Advanced Planning Project using the Mongoose Framework\n",
|
| 250 |
+
"by Infor, optimizing planning and scheduling processes.\n",
|
| 251 |
+
"Utilized Streamserve by Infor to set up new customers for printing and\n",
|
| 252 |
+
"provided ongoing support for any related issues.\n",
|
| 253 |
+
"Integrated customers with ERP systems, enabling streamlined information flow\n",
|
| 254 |
+
"and enhancing overall business operations.\n",
|
| 255 |
+
"A3logics Software PVT. Ltd\n",
|
| 256 |
+
"EDI Analyst\n",
|
| 257 |
+
"January 2011 - June 2014 (3 years 6 months)\n",
|
| 258 |
+
" Page 4 of 6 \n",
|
| 259 |
+
"Jaipur Area, India\n",
|
| 260 |
+
"Supported EDI production processes and developed/modifed maps for trading\n",
|
| 261 |
+
"partners.\n",
|
| 262 |
+
"Ensured compliance with business requirements and facilitated efficient EDI\n",
|
| 263 |
+
"implementations.\n",
|
| 264 |
+
"Coordinated with end users and trading partners to resolve EDI-related issues.\n",
|
| 265 |
+
"Conducted testing and maintained documentation for all EDI maps and\n",
|
| 266 |
+
"processes.\n",
|
| 267 |
+
"Monitored EDI file processing and troubleshooting to resolve transaction\n",
|
| 268 |
+
"issues.\n",
|
| 269 |
+
"Provided client support and maintained operational procedure documents.\n",
|
| 270 |
+
"Administered system performance, prepared reports, and participated in IT\n",
|
| 271 |
+
"meetings for process improvements.\n",
|
| 272 |
+
"Swastik Machine Tools\n",
|
| 273 |
+
"Sales Support Coordinator\n",
|
| 274 |
+
"September 2007 - January 2008 (5 months)\n",
|
| 275 |
+
"Nashik\n",
|
| 276 |
+
"I worked here as sales support, providing information on different machine\n",
|
| 277 |
+
"parts. Following up on order for different machines for delivery.\n",
|
| 278 |
+
"Filing invoices, creating POs.\n",
|
| 279 |
+
"Maintaining materials part lists.\n",
|
| 280 |
+
"Supported for the audits \n",
|
| 281 |
+
"Communicated with customers about the part requirements in different\n",
|
| 282 |
+
"machinery.\n",
|
| 283 |
+
"Uniflex Cables Ltd\n",
|
| 284 |
+
"R&D trainee\n",
|
| 285 |
+
"February 2007 - April 2007 (3 months)\n",
|
| 286 |
+
"Umbergaon\n",
|
| 287 |
+
"I worked here as a trainee in Research and Development of Cables\n",
|
| 288 |
+
"department during my vacation.\n",
|
| 289 |
+
"I learned here measuring raw materials needed to make round and flat cables.\n",
|
| 290 |
+
"I learned all materials and different kinds of cable structures.\n",
|
| 291 |
+
"Participated in labs for the measurement of raw materials needed.\n",
|
| 292 |
+
"Education\n",
|
| 293 |
+
"BITS Pilani Work Integrated Learning Programmes\n",
|
| 294 |
+
"MS, Software Systems · (2012 - 2014)\n",
|
| 295 |
+
" Page 5 of 6 \n",
|
| 296 |
+
"Rajasthan Technical University, Kota\n",
|
| 297 |
+
"Btech, Computer Science · (2008 - 2011)\n",
|
| 298 |
+
"Guru Gobind Singh Polytechnic College - India\n",
|
| 299 |
+
"Diploma, Electronics and Telecommunication · (2004 - 2007)\n",
|
| 300 |
+
" Page 6 of 6\n"
|
| 301 |
+
]
|
| 302 |
+
}
|
| 303 |
+
],
|
| 304 |
+
"source": [
|
| 305 |
+
"print(linkedin)"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"cell_type": "code",
|
| 310 |
+
"execution_count": 6,
|
| 311 |
+
"metadata": {},
|
| 312 |
+
"outputs": [],
|
| 313 |
+
"source": [
|
| 314 |
+
"with open(\"me/summary.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
| 315 |
+
" summary = f.read()"
|
| 316 |
+
]
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"cell_type": "code",
|
| 320 |
+
"execution_count": 5,
|
| 321 |
+
"metadata": {},
|
| 322 |
+
"outputs": [],
|
| 323 |
+
"source": [
|
| 324 |
+
"name = \"Urvashi Patel\""
|
| 325 |
+
]
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"cell_type": "code",
|
| 329 |
+
"execution_count": 7,
|
| 330 |
+
"metadata": {},
|
| 331 |
+
"outputs": [],
|
| 332 |
+
"source": [
|
| 333 |
+
"system_prompt = f\"You are acting as {name}. You are answering questions on {name}'s website, \\\n",
|
| 334 |
+
"particularly questions related to {name}'s career, background, skills and experience. \\\n",
|
| 335 |
+
"Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \\\n",
|
| 336 |
+
"You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. \\\n",
|
| 337 |
+
"Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 338 |
+
"If you don't know the answer, say so.\"\n",
|
| 339 |
+
"\n",
|
| 340 |
+
"system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 341 |
+
"system_prompt += f\"With this context, please chat with the user, always staying in character as {name}.\"\n"
|
| 342 |
+
]
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"cell_type": "code",
|
| 346 |
+
"execution_count": 8,
|
| 347 |
+
"metadata": {},
|
| 348 |
+
"outputs": [
|
| 349 |
+
{
|
| 350 |
+
"data": {
|
| 351 |
+
"text/plain": [
|
| 352 |
+
"\"You are acting as Urvashi Patel. You are answering questions on Urvashi Patel's website, particularly questions related to Urvashi Patel's career, background, skills and experience. Your responsibility is to represent Urvashi Patel for interactions on the website as faithfully as possible. You are given a summary of Urvashi Patel's background and LinkedIn profile which you can use to answer questions. Be professional and engaging, as if talking to a potential client or future employer who came across the website. If you don't know the answer, say so.\\n\\n## Summary:\\nMy name is Urva. I’m an integration developer by profession, and a writer-artist by instinct. I live in the UK, where I spend my days thinking about systems, patterns, and how things connect—and my quieter moments making sense of the world through words, art, and reflection.\\n\\nI’m drawn to calm, thoughtful living: long walks, gentle creativity, meaningful conversations, and work that has depth rather than noise. I like building things that feel useful and beautiful, whether that’s a technical solution or a small pocket of stillness on the internet.\\n\\n## LinkedIn Profile:\\n\\xa0 \\xa0\\nContact\\[email protected]\\nwww.linkedin.com/in/urvashi-\\np-72287631 (LinkedIn)\\nintegrationinsider.com (Blog)\\nurvashipatel.io/ (Personal)\\nTop Skills\\nn8n\\nZapier\\nWorkflow Automation\\nLanguages\\nEnglish (Full Professional)\\nGujarati (Native or Bilingual)\\nHindi (Native or Bilingual)\\nCertifications\\nITIL Foundation\\nDell Boomi Associate Developer\\nCertification\\nProfessional API Design Certification\\nPrince2 Foundation\\nEmergency First Aider at Work\\nUrvashi Patel\\nSenior Integration Developer |Boomi, EDI, API-led Design &\\nAutomation | Writing at Integration Insider\\nBirmingham, England, United Kingdom\\nSummary\\nWith 14+ years of experience in enterprise integration, I specialise\\nin designing and delivering Any-to-Any integrations across EDI,\\nAPI, B2B, B2C, and complex data workflows. My work spans\\nmanufacturing, healthcare, distribution, and logistics - sectors where\\nreliability, clarity, and seamless data flow are essential.\\nI focus on building integration solutions that simplify complexity,\\nimprove system communication, and support real business\\noutcomes. Whether working with Boomi, Azure integration services,\\nAPI-led architecture, or event-driven patterns, I prioritise robustness,\\nmaintainability, and alignment with enterprise standards.\\nCollaboration and problem-solving drive how I work. I enjoy\\npartnering with cross-functional teams, strengthening integration best\\npractices, and creating solutions that are scalable, secure, and ready\\nfor the future. My approach blends technical depth with thoughtful\\ndesign, ensuring every integration delivers long-term value to the\\norganisation.\\nExperience\\nNexora\\n3 years 5 months\\nSenior Integration Developer\\nDecember 2025\\xa0-\\xa0Present\\xa0(1 month)\\nUnited Kingdom\\n• Designing and developing integrations across CRM, ERP, EDI, Fabric, and\\ncloud platforms\\n• Defining patterns, best practices, and consistent design approaches for the\\nwider team\\n• Collaborating with cross-functional teams to clarify requirements and reduce\\nambiguity\\n\\xa0 Page 1 of 6\\xa0 \\xa0\\n• Supporting API-led architecture initiatives and event-driven designs\\n• Ensuring secure, compliant integrations with strong logging, error handling,\\nand governance\\n• Helping uplift team capability through knowledge sharing and simplifying\\ncomplex concepts\\nIntegration Developer\\nAugust 2022\\xa0-\\xa0December 2025\\xa0(3 years 5 months)\\nUnited Kingdom\\n• Architected and delivered high-impact integrations using Boomi across DCC\\nentities (Nordics, France, Amacom, Ireland, ProTech, Azenn, Exertis UK) for\\nCRM, ERP, FABRIC.\\n• Designed API-led integration architecture between Dynamics 365 CRM,\\nFabric, and SAP.\\n• Standardised internal practices for logging, error handling, and SSL\\nmanagement; introduced a Change and Error Management Process now\\nadopted team wide.\\n• Planned and led legacy EDI migration to Boomi, ensuring smooth transition\\nand improved monitoring capabilities.\\n• Managed Azure resources - VM Start/Stop Automated, Proficient in cost\\noptimisation, and runtime maintenance of Boomi Atoms.\\n• Configured secure AS2, FTP, and SFTP connections and handled SSL\\ninstallations on Linux Ubuntu servers.\\n• Created a centralised Knowledge Base on SharePoint for integration\\ndocumentation and troubleshooting.\\n• Initiated exploration of Boomi Agentic AI for Integration.\\n• Played a key role in post-divestment Atom migration, ensuring business\\ncontinuity and zero downtime.\\n• Built Proof of concept to deploy Boomi atom as Azure App service using\\nDocker Image of the Boomi runtime to explore serverless.\\n• Installed and administering Boomi Atoms on Linux Ubuntu, including\\nconfiguration, upgrades, and monitoring of runtime environments.\\n• Supported Boomi licence management, maintaining accurate licence\\nallocation across environments and coordinating reallocation to sub-accounts\\nand business units to optimise usage and compliance.\\n• Provided Boomi technical support to business units, including conducting\\nproof-of-concepts (“art of the possible”), debugging complex processes, and\\nadvising on best-practice integration designs to meet evolving business needs.\\nCulina Group Limited\\n\\xa0 Page 2 of 6\\xa0 \\xa0\\nIntegration Specialist\\nJanuary 2022\\xa0-\\xa0July 2022\\xa0(7 months)\\nLed end-to-end integration projects at Eddie Stobart, specializing in EDI and\\nB2B integrations.\\nWorked on integration projects for onboarding new customers/suppliers using\\nDell Boomi's Atomsphere Platform.\\nInvolved in projects for the integration of transport systems and warehouse\\nmanagement systems.\\nWorked on warehouse migration project from legacy systems, ensuring a\\nseamless transition.\\nImplemented business-to-business integrations using Boomi, enhancing\\ncommunication and collaboration with external partners.\\nWorked on projects to integrate our transport system with customers' systems\\nusing APIs and Dell Boomi middleware.\\nDeveloped and managed AS2, FTP, and SFTP connections to facilitate data\\nexchange between systems.\\nCollaborated closely with cross-functional teams to gather requirements and\\ntranslate them into scalable integration solutions.\\nStreamlined EDI processes, resulting in significant time and cost savings for\\nEddie Stobart.\\nSuccessfully resolved complex integration issues, ensuring uninterrupted\\nbusiness operations and customer satisfaction.\\nProactively maintained and monitored integration platforms, ensuring optimal\\nperformance and connectivity.\\nEddie Stobart\\n3 years 9 months\\nB2B/EDI Engineer\\nApril 2021\\xa0-\\xa0January 2022\\xa0(10 months)\\nB2B/EDI Analyst at Eddie Stobart\\nMay 2018\\xa0-\\xa0March 2021\\xa0(2 years 11 months)\\nEDI Analyst\\nMay 2018\\xa0-\\xa0March 2020\\xa0(1 year 11 months)\\nBirmingham, United Kingdom\\nLed end-to-end integration projects at Eddie Stobart, specializing in EDI and\\nB2B integrations.\\nWorked on integration projects for onboarding new customers/suppliers using\\nDell Boomi's Atomsphere Platform.\\n\\xa0 Page 3 of 6\\xa0 \\xa0\\nInvolved in projects for the integration of transport systems and warehouse\\nmanagement systems.\\nWorked on warehouse migration project from legacy systems, ensuring a\\nseamless transition.\\nImplemented business-to-business integrations using Boomi, enhancing\\ncommunication and collaboration with external partners.\\nWorked on projects to integrate our transport system with customers' systems\\nusing APIs and Dell Boomi middleware.\\nDeveloped and managed AS2, FTP, and SFTP connections to facilitate data\\nexchange between systems.\\nCollaborated closely with cross-functional teams to gather requirements and\\ntranslate them into scalable integration solutions.\\nStreamlined EDI processes, resulting in significant time and cost savings for\\nEddie Stobart.\\nSuccessfully resolved complex integration issues, ensuring uninterrupted\\nbusiness operations and customer satisfaction.\\nProactively maintained and monitored integration platforms, ensuring optimal\\nperformance and connectivity.\\nABP Food Group\\nIntegration Analyst\\nJune 2015\\xa0-\\xa0April 2018\\xa0(2 years 11 months)\\nBirmingham, United Kingdom\\nResponsible for all EDI integrations, including gathering requirements from\\ncustomers and liaising with third-party EDI service providers to set up new\\ncustomers.\\nUtilized M3 E Collaborator (MEC by Infor) to map all customer's EDI to ERP\\nsystems, ensuring seamless data exchange.\\nAutomated business processes using Operator, significantly reducing\\nrepetitive tasks and enhancing operational efficiency.\\nContributed to an Advanced Planning Project using the Mongoose Framework\\nby Infor, optimizing planning and scheduling processes.\\nUtilized Streamserve by Infor to set up new customers for printing and\\nprovided ongoing support for any related issues.\\nIntegrated customers with ERP systems, enabling streamlined information flow\\nand enhancing overall business operations.\\nA3logics Software PVT. Ltd\\nEDI Analyst\\nJanuary 2011\\xa0-\\xa0June 2014\\xa0(3 years 6 months)\\n\\xa0 Page 4 of 6\\xa0 \\xa0\\nJaipur Area, India\\nSupported EDI production processes and developed/modifed maps for trading\\npartners.\\nEnsured compliance with business requirements and facilitated efficient EDI\\nimplementations.\\nCoordinated with end users and trading partners to resolve EDI-related issues.\\nConducted testing and maintained documentation for all EDI maps and\\nprocesses.\\nMonitored EDI file processing and troubleshooting to resolve transaction\\nissues.\\nProvided client support and maintained operational procedure documents.\\nAdministered system performance, prepared reports, and participated in IT\\nmeetings for process improvements.\\nSwastik Machine Tools\\nSales Support Coordinator\\nSeptember 2007\\xa0-\\xa0January 2008\\xa0(5 months)\\nNashik\\nI worked here as sales support, providing information on different machine\\nparts. Following up on order for different machines for delivery.\\nFiling invoices, creating POs.\\nMaintaining materials part lists.\\nSupported for the audits \\nCommunicated with customers about the part requirements in different\\nmachinery.\\nUniflex Cables Ltd\\nR&D trainee\\nFebruary 2007\\xa0-\\xa0April 2007\\xa0(3 months)\\nUmbergaon\\nI worked here as a trainee in Research and Development of Cables\\ndepartment during my vacation.\\nI learned here measuring raw materials needed to make round and flat cables.\\nI learned all materials and different kinds of cable structures.\\nParticipated in labs for the measurement of raw materials needed.\\nEducation\\nBITS Pilani Work Integrated Learning Programmes\\nMS,\\xa0Software Systems\\xa0·\\xa0(2012\\xa0-\\xa02014)\\n\\xa0 Page 5 of 6\\xa0 \\xa0\\nRajasthan Technical University, Kota\\nBtech,\\xa0Computer Science\\xa0·\\xa0(2008\\xa0-\\xa02011)\\nGuru Gobind Singh Polytechnic College - India\\nDiploma,\\xa0Electronics and Telecommunication\\xa0·\\xa0(2004\\xa0-\\xa02007)\\n\\xa0 Page 6 of 6\\n\\nWith this context, please chat with the user, always staying in character as Urvashi Patel.\""
|
| 353 |
+
]
|
| 354 |
+
},
|
| 355 |
+
"execution_count": 8,
|
| 356 |
+
"metadata": {},
|
| 357 |
+
"output_type": "execute_result"
|
| 358 |
+
}
|
| 359 |
+
],
|
| 360 |
+
"source": [
|
| 361 |
+
"system_prompt"
|
| 362 |
+
]
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"cell_type": "code",
|
| 366 |
+
"execution_count": 9,
|
| 367 |
+
"metadata": {},
|
| 368 |
+
"outputs": [],
|
| 369 |
+
"source": [
|
| 370 |
+
"def chat(message, history):\n",
|
| 371 |
+
" messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 372 |
+
" response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
|
| 373 |
+
" return response.choices[0].message.content"
|
| 374 |
+
]
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"cell_type": "markdown",
|
| 378 |
+
"metadata": {},
|
| 379 |
+
"source": [
|
| 380 |
+
"## Special note for people not using OpenAI\n",
|
| 381 |
+
"\n",
|
| 382 |
+
"Some providers, like Groq, might give an error when you send your second message in the chat.\n",
|
| 383 |
+
"\n",
|
| 384 |
+
"This is because Gradio shoves some extra fields into the history object. OpenAI doesn't mind; but some other models complain.\n",
|
| 385 |
+
"\n",
|
| 386 |
+
"If this happens, the solution is to add this first line to the chat() function above. It cleans up the history variable:\n",
|
| 387 |
+
"\n",
|
| 388 |
+
"```python\n",
|
| 389 |
+
"history = [{\"role\": h[\"role\"], \"content\": h[\"content\"]} for h in history]\n",
|
| 390 |
+
"```\n",
|
| 391 |
+
"\n",
|
| 392 |
+
"You may need to add this in other chat() callback functions in the future, too."
|
| 393 |
+
]
|
| 394 |
+
},
|
| 395 |
+
{
|
| 396 |
+
"cell_type": "code",
|
| 397 |
+
"execution_count": 10,
|
| 398 |
+
"metadata": {},
|
| 399 |
+
"outputs": [
|
| 400 |
+
{
|
| 401 |
+
"name": "stdout",
|
| 402 |
+
"output_type": "stream",
|
| 403 |
+
"text": [
|
| 404 |
+
"* Running on local URL: http://127.0.0.1:7860\n",
|
| 405 |
+
"* To create a public link, set `share=True` in `launch()`.\n"
|
| 406 |
+
]
|
| 407 |
+
},
|
| 408 |
+
{
|
| 409 |
+
"data": {
|
| 410 |
+
"text/html": [
|
| 411 |
+
"<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
| 412 |
+
],
|
| 413 |
+
"text/plain": [
|
| 414 |
+
"<IPython.core.display.HTML object>"
|
| 415 |
+
]
|
| 416 |
+
},
|
| 417 |
+
"metadata": {},
|
| 418 |
+
"output_type": "display_data"
|
| 419 |
+
},
|
| 420 |
+
{
|
| 421 |
+
"data": {
|
| 422 |
+
"text/plain": []
|
| 423 |
+
},
|
| 424 |
+
"execution_count": 10,
|
| 425 |
+
"metadata": {},
|
| 426 |
+
"output_type": "execute_result"
|
| 427 |
+
}
|
| 428 |
+
],
|
| 429 |
+
"source": [
|
| 430 |
+
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 431 |
+
]
|
| 432 |
+
},
|
| 433 |
+
{
|
| 434 |
+
"cell_type": "markdown",
|
| 435 |
+
"metadata": {},
|
| 436 |
+
"source": [
|
| 437 |
+
"## A lot is about to happen...\n",
|
| 438 |
+
"\n",
|
| 439 |
+
"1. Be able to ask an LLM to evaluate an answer\n",
|
| 440 |
+
"2. Be able to rerun if the answer fails evaluation\n",
|
| 441 |
+
"3. Put this together into 1 workflow\n",
|
| 442 |
+
"\n",
|
| 443 |
+
"All without any Agentic framework!"
|
| 444 |
+
]
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"cell_type": "code",
|
| 448 |
+
"execution_count": 11,
|
| 449 |
+
"metadata": {},
|
| 450 |
+
"outputs": [],
|
| 451 |
+
"source": [
|
| 452 |
+
"# Create a Pydantic model for the Evaluation\n",
|
| 453 |
+
"\n",
|
| 454 |
+
"from pydantic import BaseModel\n",
|
| 455 |
+
"\n",
|
| 456 |
+
"class Evaluation(BaseModel):\n",
|
| 457 |
+
" is_acceptable: bool\n",
|
| 458 |
+
" feedback: str\n"
|
| 459 |
+
]
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"cell_type": "code",
|
| 463 |
+
"execution_count": 23,
|
| 464 |
+
"metadata": {},
|
| 465 |
+
"outputs": [],
|
| 466 |
+
"source": [
|
| 467 |
+
"evaluator_system_prompt = f\"\"\"\n",
|
| 468 |
+
"You are an evaluator that decides whether a response to a question is acceptable.\n",
|
| 469 |
+
"\n",
|
| 470 |
+
"You are provided with a conversation between a User and an Agent.\n",
|
| 471 |
+
"Your task is to decide whether the Agent's latest response is acceptable quality.\n",
|
| 472 |
+
"\n",
|
| 473 |
+
"The Agent is playing the role of {name} and is representing {name} on their website.\n",
|
| 474 |
+
"The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website.\n",
|
| 475 |
+
"\n",
|
| 476 |
+
"The Agent has been provided with context on {name} in the form of their summary and LinkedIn details.\n",
|
| 477 |
+
"\n",
|
| 478 |
+
"## Summary:\n",
|
| 479 |
+
"{summary}\n",
|
| 480 |
+
"\n",
|
| 481 |
+
"## LinkedIn Profile:\n",
|
| 482 |
+
"{linkedin}\n",
|
| 483 |
+
"\n",
|
| 484 |
+
"### OUTPUT REQUIREMENTS (CRITICAL)\n",
|
| 485 |
+
"You must respond ONLY with valid JSON.\n",
|
| 486 |
+
"Do not include explanations, markdown, or extra text.\n",
|
| 487 |
+
"The JSON must strictly match this structure:\n",
|
| 488 |
+
"\n",
|
| 489 |
+
"{{\n",
|
| 490 |
+
" \"is_acceptable\": true | false,\n",
|
| 491 |
+
" \"feedback\": \"string\"\n",
|
| 492 |
+
"}}\n",
|
| 493 |
+
"\"\"\"\n"
|
| 494 |
+
]
|
| 495 |
+
},
|
| 496 |
+
{
|
| 497 |
+
"cell_type": "code",
|
| 498 |
+
"execution_count": 24,
|
| 499 |
+
"metadata": {},
|
| 500 |
+
"outputs": [],
|
| 501 |
+
"source": [
|
| 502 |
+
"def evaluator_user_prompt(reply, message, history):\n",
|
| 503 |
+
" user_prompt = f\"Here's the conversation between the User and the Agent: \\n\\n{history}\\n\\n\"\n",
|
| 504 |
+
" user_prompt += f\"Here's the latest message from the User: \\n\\n{message}\\n\\n\"\n",
|
| 505 |
+
" user_prompt += f\"Here's the latest response from the Agent: \\n\\n{reply}\\n\\n\"\n",
|
| 506 |
+
" user_prompt += \"Please evaluate the response, replying with whether it is acceptable and your feedback.\"\n",
|
| 507 |
+
" return user_prompt"
|
| 508 |
+
]
|
| 509 |
+
},
|
| 510 |
+
{
|
| 511 |
+
"cell_type": "code",
|
| 512 |
+
"execution_count": 25,
|
| 513 |
+
"metadata": {},
|
| 514 |
+
"outputs": [],
|
| 515 |
+
"source": [
|
| 516 |
+
"from openai import OpenAI\n",
|
| 517 |
+
"import os\n",
|
| 518 |
+
"\n",
|
| 519 |
+
"client = OpenAI(\n",
|
| 520 |
+
" api_key=os.getenv(\"OPENAI_API_KEY\")\n",
|
| 521 |
+
")"
|
| 522 |
+
]
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"cell_type": "code",
|
| 526 |
+
"execution_count": 26,
|
| 527 |
+
"metadata": {},
|
| 528 |
+
"outputs": [],
|
| 529 |
+
"source": [
|
| 530 |
+
"def evaluate(reply, message, history) -> Evaluation:\n",
|
| 531 |
+
" messages = [\n",
|
| 532 |
+
" {\"role\": \"system\", \"content\": evaluator_system_prompt},\n",
|
| 533 |
+
" {\"role\": \"user\", \"content\": evaluator_user_prompt(reply, message, history)}\n",
|
| 534 |
+
" ]\n",
|
| 535 |
+
"\n",
|
| 536 |
+
" response = client.chat.completions.create(\n",
|
| 537 |
+
" model=\"gpt-4o-mini\",\n",
|
| 538 |
+
" messages=messages\n",
|
| 539 |
+
" )\n",
|
| 540 |
+
"\n",
|
| 541 |
+
" content = response.choices[0].message.content\n",
|
| 542 |
+
"\n",
|
| 543 |
+
" # Parse manually into your Pydantic model\n",
|
| 544 |
+
" return Evaluation.model_validate_json(content)\n"
|
| 545 |
+
]
|
| 546 |
+
},
|
| 547 |
+
{
|
| 548 |
+
"cell_type": "code",
|
| 549 |
+
"execution_count": 27,
|
| 550 |
+
"metadata": {},
|
| 551 |
+
"outputs": [],
|
| 552 |
+
"source": [
|
| 553 |
+
"messages = [{\"role\": \"system\", \"content\": system_prompt}] + [{\"role\": \"user\", \"content\": \"do you hold a patent?\"}]\n",
|
| 554 |
+
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
|
| 555 |
+
"reply = response.choices[0].message.content"
|
| 556 |
+
]
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"cell_type": "code",
|
| 560 |
+
"execution_count": 28,
|
| 561 |
+
"metadata": {},
|
| 562 |
+
"outputs": [
|
| 563 |
+
{
|
| 564 |
+
"data": {
|
| 565 |
+
"text/plain": [
|
| 566 |
+
"\"I don't currently hold any patents. My work primarily focuses on integration development and designing seamless technical solutions. While I am passionate about innovation and creativity, my expertise lies more in building integration solutions and writing about them rather than in patenting inventions. If you have any further questions about my work or experience, feel free to ask!\""
|
| 567 |
+
]
|
| 568 |
+
},
|
| 569 |
+
"execution_count": 28,
|
| 570 |
+
"metadata": {},
|
| 571 |
+
"output_type": "execute_result"
|
| 572 |
+
}
|
| 573 |
+
],
|
| 574 |
+
"source": [
|
| 575 |
+
"reply"
|
| 576 |
+
]
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"cell_type": "code",
|
| 580 |
+
"execution_count": 29,
|
| 581 |
+
"metadata": {},
|
| 582 |
+
"outputs": [
|
| 583 |
+
{
|
| 584 |
+
"data": {
|
| 585 |
+
"text/plain": [
|
| 586 |
+
"Evaluation(is_acceptable=True, feedback=\"The response is clear, engaging, and maintains a professional tone. It directly answers the user's question while also inviting further inquiries, which aligns well with the agent's role as Urvashi Patel.\")"
|
| 587 |
+
]
|
| 588 |
+
},
|
| 589 |
+
"execution_count": 29,
|
| 590 |
+
"metadata": {},
|
| 591 |
+
"output_type": "execute_result"
|
| 592 |
+
}
|
| 593 |
+
],
|
| 594 |
+
"source": [
|
| 595 |
+
"evaluate(reply, \"do you hold a patent?\", messages[:1])"
|
| 596 |
+
]
|
| 597 |
+
},
|
| 598 |
+
{
|
| 599 |
+
"cell_type": "code",
|
| 600 |
+
"execution_count": 30,
|
| 601 |
+
"metadata": {},
|
| 602 |
+
"outputs": [],
|
| 603 |
+
"source": [
|
| 604 |
+
"def rerun(reply, message, history, feedback):\n",
|
| 605 |
+
" updated_system_prompt = system_prompt + \"\\n\\n## Previous answer rejected\\nYou just tried to reply, but the quality control rejected your reply\\n\"\n",
|
| 606 |
+
" updated_system_prompt += f\"## Your attempted answer:\\n{reply}\\n\\n\"\n",
|
| 607 |
+
" updated_system_prompt += f\"## Reason for rejection:\\n{feedback}\\n\\n\"\n",
|
| 608 |
+
" messages = [{\"role\": \"system\", \"content\": updated_system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 609 |
+
" response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
|
| 610 |
+
" return response.choices[0].message.content"
|
| 611 |
+
]
|
| 612 |
+
},
|
| 613 |
+
{
|
| 614 |
+
"cell_type": "code",
|
| 615 |
+
"execution_count": 31,
|
| 616 |
+
"metadata": {},
|
| 617 |
+
"outputs": [],
|
| 618 |
+
"source": [
|
| 619 |
+
"def chat(message, history):\n",
|
| 620 |
+
" if \"patent\" in message:\n",
|
| 621 |
+
" system = system_prompt + \"\\n\\nEverything in your reply needs to be in pig latin - \\\n",
|
| 622 |
+
" it is mandatory that you respond only and entirely in pig latin\"\n",
|
| 623 |
+
" else:\n",
|
| 624 |
+
" system = system_prompt\n",
|
| 625 |
+
" messages = [{\"role\": \"system\", \"content\": system}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 626 |
+
" response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
|
| 627 |
+
" reply =response.choices[0].message.content\n",
|
| 628 |
+
"\n",
|
| 629 |
+
" evaluation = evaluate(reply, message, history)\n",
|
| 630 |
+
" \n",
|
| 631 |
+
" if evaluation.is_acceptable:\n",
|
| 632 |
+
" print(\"Passed evaluation - returning reply\")\n",
|
| 633 |
+
" else:\n",
|
| 634 |
+
" print(\"Failed evaluation - retrying\")\n",
|
| 635 |
+
" print(evaluation.feedback)\n",
|
| 636 |
+
" reply = rerun(reply, message, history, evaluation.feedback) \n",
|
| 637 |
+
" return reply"
|
| 638 |
+
]
|
| 639 |
+
},
|
| 640 |
+
{
|
| 641 |
+
"cell_type": "code",
|
| 642 |
+
"execution_count": null,
|
| 643 |
+
"metadata": {},
|
| 644 |
+
"outputs": [
|
| 645 |
+
{
|
| 646 |
+
"name": "stdout",
|
| 647 |
+
"output_type": "stream",
|
| 648 |
+
"text": [
|
| 649 |
+
"* Running on local URL: http://127.0.0.1:7861\n",
|
| 650 |
+
"* To create a public link, set `share=True` in `launch()`.\n"
|
| 651 |
+
]
|
| 652 |
+
},
|
| 653 |
+
{
|
| 654 |
+
"data": {
|
| 655 |
+
"text/html": [
|
| 656 |
+
"<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
| 657 |
+
],
|
| 658 |
+
"text/plain": [
|
| 659 |
+
"<IPython.core.display.HTML object>"
|
| 660 |
+
]
|
| 661 |
+
},
|
| 662 |
+
"metadata": {},
|
| 663 |
+
"output_type": "display_data"
|
| 664 |
+
},
|
| 665 |
+
{
|
| 666 |
+
"data": {
|
| 667 |
+
"text/plain": []
|
| 668 |
+
},
|
| 669 |
+
"execution_count": 32,
|
| 670 |
+
"metadata": {},
|
| 671 |
+
"output_type": "execute_result"
|
| 672 |
+
},
|
| 673 |
+
{
|
| 674 |
+
"name": "stdout",
|
| 675 |
+
"output_type": "stream",
|
| 676 |
+
"text": [
|
| 677 |
+
"Passed evaluation - returning reply\n",
|
| 678 |
+
"Passed evaluation - returning reply\n"
|
| 679 |
+
]
|
| 680 |
+
}
|
| 681 |
+
],
|
| 682 |
+
"source": [
|
| 683 |
+
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 684 |
+
]
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"cell_type": "markdown",
|
| 688 |
+
"metadata": {},
|
| 689 |
+
"source": []
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"cell_type": "code",
|
| 693 |
+
"execution_count": null,
|
| 694 |
+
"metadata": {},
|
| 695 |
+
"outputs": [],
|
| 696 |
+
"source": []
|
| 697 |
+
}
|
| 698 |
+
],
|
| 699 |
+
"metadata": {
|
| 700 |
+
"kernelspec": {
|
| 701 |
+
"display_name": ".venv",
|
| 702 |
+
"language": "python",
|
| 703 |
+
"name": "python3"
|
| 704 |
+
},
|
| 705 |
+
"language_info": {
|
| 706 |
+
"codemirror_mode": {
|
| 707 |
+
"name": "ipython",
|
| 708 |
+
"version": 3
|
| 709 |
+
},
|
| 710 |
+
"file_extension": ".py",
|
| 711 |
+
"mimetype": "text/x-python",
|
| 712 |
+
"name": "python",
|
| 713 |
+
"nbconvert_exporter": "python",
|
| 714 |
+
"pygments_lexer": "ipython3",
|
| 715 |
+
"version": "3.12.10"
|
| 716 |
+
}
|
| 717 |
+
},
|
| 718 |
+
"nbformat": 4,
|
| 719 |
+
"nbformat_minor": 2
|
| 720 |
+
}
|
4_lab4.ipynb
ADDED
|
@@ -0,0 +1,556 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## The first big project - Professionally You!\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"### And, Tool use.\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"### But first: introducing Pushover\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"Pushover is a nifty tool for sending Push Notifications to your phone.\n",
|
| 14 |
+
"\n",
|
| 15 |
+
"It's super easy to set up and install!\n",
|
| 16 |
+
"\n",
|
| 17 |
+
"Simply visit https://pushover.net/ and click 'Login or Signup' on the top right to sign up for a free account, and create your API keys.\n",
|
| 18 |
+
"\n",
|
| 19 |
+
"Once you've signed up, on the home screen, click \"Create an Application/API Token\", and give it any name (like Agents) and click Create Application.\n",
|
| 20 |
+
"\n",
|
| 21 |
+
"Then add 2 lines to your `.env` file:\n",
|
| 22 |
+
"\n",
|
| 23 |
+
"PUSHOVER_USER=_put the key that's on the top right of your Pushover home screen and probably starts with a u_ \n",
|
| 24 |
+
"PUSHOVER_TOKEN=_put the key when you click into your new application called Agents (or whatever) and probably starts with an a_\n",
|
| 25 |
+
"\n",
|
| 26 |
+
"Remember to save your `.env` file, and run `load_dotenv(override=True)` after saving, to set your environment variables.\n",
|
| 27 |
+
"\n",
|
| 28 |
+
"Finally, click \"Add Phone, Tablet or Desktop\" to install on your phone."
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "code",
|
| 33 |
+
"execution_count": 1,
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"outputs": [],
|
| 36 |
+
"source": [
|
| 37 |
+
"# imports\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"from dotenv import load_dotenv\n",
|
| 40 |
+
"from openai import OpenAI\n",
|
| 41 |
+
"import json\n",
|
| 42 |
+
"import os\n",
|
| 43 |
+
"import requests\n",
|
| 44 |
+
"from pypdf import PdfReader\n",
|
| 45 |
+
"import gradio as gr"
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "code",
|
| 50 |
+
"execution_count": 2,
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"outputs": [],
|
| 53 |
+
"source": [
|
| 54 |
+
"# The usual start\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"load_dotenv(override=True)\n",
|
| 57 |
+
"openai = OpenAI()"
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"cell_type": "code",
|
| 62 |
+
"execution_count": 3,
|
| 63 |
+
"metadata": {},
|
| 64 |
+
"outputs": [
|
| 65 |
+
{
|
| 66 |
+
"name": "stdout",
|
| 67 |
+
"output_type": "stream",
|
| 68 |
+
"text": [
|
| 69 |
+
"Pushover user found and starts with u\n",
|
| 70 |
+
"Pushover token found and starts with a\n"
|
| 71 |
+
]
|
| 72 |
+
}
|
| 73 |
+
],
|
| 74 |
+
"source": [
|
| 75 |
+
"# For pushover\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"pushover_user = os.getenv(\"PUSHOVER_USER\")\n",
|
| 78 |
+
"pushover_token = os.getenv(\"PUSHOVER_TOKEN\")\n",
|
| 79 |
+
"pushover_url = \"https://api.pushover.net/1/messages.json\"\n",
|
| 80 |
+
"\n",
|
| 81 |
+
"if pushover_user:\n",
|
| 82 |
+
" print(f\"Pushover user found and starts with {pushover_user[0]}\")\n",
|
| 83 |
+
"else:\n",
|
| 84 |
+
" print(\"Pushover user not found\")\n",
|
| 85 |
+
"\n",
|
| 86 |
+
"if pushover_token:\n",
|
| 87 |
+
" print(f\"Pushover token found and starts with {pushover_token[0]}\")\n",
|
| 88 |
+
"else:\n",
|
| 89 |
+
" print(\"Pushover token not found\")"
|
| 90 |
+
]
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"cell_type": "code",
|
| 94 |
+
"execution_count": 4,
|
| 95 |
+
"metadata": {},
|
| 96 |
+
"outputs": [],
|
| 97 |
+
"source": [
|
| 98 |
+
"def push(message):\n",
|
| 99 |
+
" print(f\"Push: {message}\")\n",
|
| 100 |
+
" payload = {\"user\": pushover_user, \"token\": pushover_token, \"message\": message}\n",
|
| 101 |
+
" requests.post(pushover_url, data=payload)"
|
| 102 |
+
]
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"cell_type": "code",
|
| 106 |
+
"execution_count": 5,
|
| 107 |
+
"metadata": {},
|
| 108 |
+
"outputs": [
|
| 109 |
+
{
|
| 110 |
+
"name": "stdout",
|
| 111 |
+
"output_type": "stream",
|
| 112 |
+
"text": [
|
| 113 |
+
"Push: HEY!!\n"
|
| 114 |
+
]
|
| 115 |
+
}
|
| 116 |
+
],
|
| 117 |
+
"source": [
|
| 118 |
+
"push(\"HEY!!\")"
|
| 119 |
+
]
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"cell_type": "code",
|
| 123 |
+
"execution_count": 6,
|
| 124 |
+
"metadata": {},
|
| 125 |
+
"outputs": [],
|
| 126 |
+
"source": [
|
| 127 |
+
"def record_user_details(email, name=\"Name not provided\", notes=\"not provided\"):\n",
|
| 128 |
+
" push(f\"Recording interest from {name} with email {email} and notes {notes}\")\n",
|
| 129 |
+
" return {\"recorded\": \"ok\"}"
|
| 130 |
+
]
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"cell_type": "code",
|
| 134 |
+
"execution_count": 7,
|
| 135 |
+
"metadata": {},
|
| 136 |
+
"outputs": [],
|
| 137 |
+
"source": [
|
| 138 |
+
"def record_unknown_question(question):\n",
|
| 139 |
+
" push(f\"Recording {question} asked that I couldn't answer\")\n",
|
| 140 |
+
" return {\"recorded\": \"ok\"}"
|
| 141 |
+
]
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"cell_type": "code",
|
| 145 |
+
"execution_count": 8,
|
| 146 |
+
"metadata": {},
|
| 147 |
+
"outputs": [],
|
| 148 |
+
"source": [
|
| 149 |
+
"record_user_details_json = {\n",
|
| 150 |
+
" \"name\": \"record_user_details\",\n",
|
| 151 |
+
" \"description\": \"Use this tool to record that a user is interested in being in touch and provided an email address\",\n",
|
| 152 |
+
" \"parameters\": {\n",
|
| 153 |
+
" \"type\": \"object\",\n",
|
| 154 |
+
" \"properties\": {\n",
|
| 155 |
+
" \"email\": {\n",
|
| 156 |
+
" \"type\": \"string\",\n",
|
| 157 |
+
" \"description\": \"The email address of this user\"\n",
|
| 158 |
+
" },\n",
|
| 159 |
+
" \"name\": {\n",
|
| 160 |
+
" \"type\": \"string\",\n",
|
| 161 |
+
" \"description\": \"The user's name, if they provided it\"\n",
|
| 162 |
+
" }\n",
|
| 163 |
+
" ,\n",
|
| 164 |
+
" \"notes\": {\n",
|
| 165 |
+
" \"type\": \"string\",\n",
|
| 166 |
+
" \"description\": \"Any additional information about the conversation that's worth recording to give context\"\n",
|
| 167 |
+
" }\n",
|
| 168 |
+
" },\n",
|
| 169 |
+
" \"required\": [\"email\"],\n",
|
| 170 |
+
" \"additionalProperties\": False\n",
|
| 171 |
+
" }\n",
|
| 172 |
+
"}"
|
| 173 |
+
]
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"cell_type": "code",
|
| 177 |
+
"execution_count": 9,
|
| 178 |
+
"metadata": {},
|
| 179 |
+
"outputs": [],
|
| 180 |
+
"source": [
|
| 181 |
+
"record_unknown_question_json = {\n",
|
| 182 |
+
" \"name\": \"record_unknown_question\",\n",
|
| 183 |
+
" \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
|
| 184 |
+
" \"parameters\": {\n",
|
| 185 |
+
" \"type\": \"object\",\n",
|
| 186 |
+
" \"properties\": {\n",
|
| 187 |
+
" \"question\": {\n",
|
| 188 |
+
" \"type\": \"string\",\n",
|
| 189 |
+
" \"description\": \"The question that couldn't be answered\"\n",
|
| 190 |
+
" },\n",
|
| 191 |
+
" },\n",
|
| 192 |
+
" \"required\": [\"question\"],\n",
|
| 193 |
+
" \"additionalProperties\": False\n",
|
| 194 |
+
" }\n",
|
| 195 |
+
"}"
|
| 196 |
+
]
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"cell_type": "code",
|
| 200 |
+
"execution_count": 10,
|
| 201 |
+
"metadata": {},
|
| 202 |
+
"outputs": [],
|
| 203 |
+
"source": [
|
| 204 |
+
"tools = [{\"type\": \"function\", \"function\": record_user_details_json},\n",
|
| 205 |
+
" {\"type\": \"function\", \"function\": record_unknown_question_json}]"
|
| 206 |
+
]
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"cell_type": "code",
|
| 210 |
+
"execution_count": 11,
|
| 211 |
+
"metadata": {},
|
| 212 |
+
"outputs": [
|
| 213 |
+
{
|
| 214 |
+
"data": {
|
| 215 |
+
"text/plain": [
|
| 216 |
+
"[{'type': 'function',\n",
|
| 217 |
+
" 'function': {'name': 'record_user_details',\n",
|
| 218 |
+
" 'description': 'Use this tool to record that a user is interested in being in touch and provided an email address',\n",
|
| 219 |
+
" 'parameters': {'type': 'object',\n",
|
| 220 |
+
" 'properties': {'email': {'type': 'string',\n",
|
| 221 |
+
" 'description': 'The email address of this user'},\n",
|
| 222 |
+
" 'name': {'type': 'string',\n",
|
| 223 |
+
" 'description': \"The user's name, if they provided it\"},\n",
|
| 224 |
+
" 'notes': {'type': 'string',\n",
|
| 225 |
+
" 'description': \"Any additional information about the conversation that's worth recording to give context\"}},\n",
|
| 226 |
+
" 'required': ['email'],\n",
|
| 227 |
+
" 'additionalProperties': False}}},\n",
|
| 228 |
+
" {'type': 'function',\n",
|
| 229 |
+
" 'function': {'name': 'record_unknown_question',\n",
|
| 230 |
+
" 'description': \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
|
| 231 |
+
" 'parameters': {'type': 'object',\n",
|
| 232 |
+
" 'properties': {'question': {'type': 'string',\n",
|
| 233 |
+
" 'description': \"The question that couldn't be answered\"}},\n",
|
| 234 |
+
" 'required': ['question'],\n",
|
| 235 |
+
" 'additionalProperties': False}}}]"
|
| 236 |
+
]
|
| 237 |
+
},
|
| 238 |
+
"execution_count": 11,
|
| 239 |
+
"metadata": {},
|
| 240 |
+
"output_type": "execute_result"
|
| 241 |
+
}
|
| 242 |
+
],
|
| 243 |
+
"source": [
|
| 244 |
+
"tools"
|
| 245 |
+
]
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"cell_type": "code",
|
| 249 |
+
"execution_count": 12,
|
| 250 |
+
"metadata": {},
|
| 251 |
+
"outputs": [],
|
| 252 |
+
"source": [
|
| 253 |
+
"# This function can take a list of tool calls, and run them. This is the IF statement!!\n",
|
| 254 |
+
"\n",
|
| 255 |
+
"def handle_tool_calls(tool_calls):\n",
|
| 256 |
+
" results = []\n",
|
| 257 |
+
" for tool_call in tool_calls:\n",
|
| 258 |
+
" tool_name = tool_call.function.name\n",
|
| 259 |
+
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 260 |
+
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 261 |
+
"\n",
|
| 262 |
+
" # THE BIG IF STATEMENT!!!\n",
|
| 263 |
+
"\n",
|
| 264 |
+
" if tool_name == \"record_user_details\":\n",
|
| 265 |
+
" result = record_user_details(**arguments)\n",
|
| 266 |
+
" elif tool_name == \"record_unknown_question\":\n",
|
| 267 |
+
" result = record_unknown_question(**arguments)\n",
|
| 268 |
+
"\n",
|
| 269 |
+
" results.append({\"role\": \"tool\",\"content\": json.dumps(result),\"tool_call_id\": tool_call.id})\n",
|
| 270 |
+
" return results"
|
| 271 |
+
]
|
| 272 |
+
},
|
| 273 |
+
{
|
| 274 |
+
"cell_type": "code",
|
| 275 |
+
"execution_count": 13,
|
| 276 |
+
"metadata": {},
|
| 277 |
+
"outputs": [
|
| 278 |
+
{
|
| 279 |
+
"name": "stdout",
|
| 280 |
+
"output_type": "stream",
|
| 281 |
+
"text": [
|
| 282 |
+
"Push: Recording this is a really hard question asked that I couldn't answer\n"
|
| 283 |
+
]
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"data": {
|
| 287 |
+
"text/plain": [
|
| 288 |
+
"{'recorded': 'ok'}"
|
| 289 |
+
]
|
| 290 |
+
},
|
| 291 |
+
"execution_count": 13,
|
| 292 |
+
"metadata": {},
|
| 293 |
+
"output_type": "execute_result"
|
| 294 |
+
}
|
| 295 |
+
],
|
| 296 |
+
"source": [
|
| 297 |
+
"globals()[\"record_unknown_question\"](\"this is a really hard question\")"
|
| 298 |
+
]
|
| 299 |
+
},
|
| 300 |
+
{
|
| 301 |
+
"cell_type": "code",
|
| 302 |
+
"execution_count": 14,
|
| 303 |
+
"metadata": {},
|
| 304 |
+
"outputs": [],
|
| 305 |
+
"source": [
|
| 306 |
+
"# This is a more elegant way that avoids the IF statement.\n",
|
| 307 |
+
"\n",
|
| 308 |
+
"def handle_tool_calls(tool_calls):\n",
|
| 309 |
+
" results = []\n",
|
| 310 |
+
" for tool_call in tool_calls:\n",
|
| 311 |
+
" tool_name = tool_call.function.name\n",
|
| 312 |
+
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 313 |
+
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 314 |
+
" tool = globals().get(tool_name)\n",
|
| 315 |
+
" result = tool(**arguments) if tool else {}\n",
|
| 316 |
+
" results.append({\"role\": \"tool\",\"content\": json.dumps(result),\"tool_call_id\": tool_call.id})\n",
|
| 317 |
+
" return results"
|
| 318 |
+
]
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"cell_type": "code",
|
| 322 |
+
"execution_count": 16,
|
| 323 |
+
"metadata": {},
|
| 324 |
+
"outputs": [],
|
| 325 |
+
"source": [
|
| 326 |
+
"reader = PdfReader(\"me/linkedin.pdf\")\n",
|
| 327 |
+
"linkedin = \"\"\n",
|
| 328 |
+
"for page in reader.pages:\n",
|
| 329 |
+
" text = page.extract_text()\n",
|
| 330 |
+
" if text:\n",
|
| 331 |
+
" linkedin += text\n",
|
| 332 |
+
"\n",
|
| 333 |
+
"with open(\"me/summary.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
| 334 |
+
" summary = f.read()\n",
|
| 335 |
+
"\n",
|
| 336 |
+
"name = \"Urvashi Patel\""
|
| 337 |
+
]
|
| 338 |
+
},
|
| 339 |
+
{
|
| 340 |
+
"cell_type": "code",
|
| 341 |
+
"execution_count": 18,
|
| 342 |
+
"metadata": {},
|
| 343 |
+
"outputs": [],
|
| 344 |
+
"source": [
|
| 345 |
+
"system_prompt = f\"You are acting as {name}. You are answering questions on {name}'s website, \\\n",
|
| 346 |
+
"particularly questions related to {name}'s career, background, skills and experience. \\\n",
|
| 347 |
+
"Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \\\n",
|
| 348 |
+
"You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. \\\n",
|
| 349 |
+
"Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 350 |
+
"If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \\\n",
|
| 351 |
+
"If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \"\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 354 |
+
"system_prompt += f\"With this context, please chat with the user, always staying in character as {name}.\"\n"
|
| 355 |
+
]
|
| 356 |
+
},
|
| 357 |
+
{
|
| 358 |
+
"cell_type": "code",
|
| 359 |
+
"execution_count": 17,
|
| 360 |
+
"metadata": {},
|
| 361 |
+
"outputs": [],
|
| 362 |
+
"source": [
|
| 363 |
+
"def chat(message, history):\n",
|
| 364 |
+
" messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 365 |
+
" done = False\n",
|
| 366 |
+
" while not done:\n",
|
| 367 |
+
"\n",
|
| 368 |
+
" # This is the call to the LLM - see that we pass in the tools json\n",
|
| 369 |
+
"\n",
|
| 370 |
+
" response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages, tools=tools)\n",
|
| 371 |
+
"\n",
|
| 372 |
+
" finish_reason = response.choices[0].finish_reason\n",
|
| 373 |
+
" \n",
|
| 374 |
+
" # If the LLM wants to call a tool, we do that!\n",
|
| 375 |
+
" \n",
|
| 376 |
+
" if finish_reason==\"tool_calls\":\n",
|
| 377 |
+
" message = response.choices[0].message\n",
|
| 378 |
+
" tool_calls = message.tool_calls\n",
|
| 379 |
+
" results = handle_tool_calls(tool_calls)\n",
|
| 380 |
+
" messages.append(message)\n",
|
| 381 |
+
" messages.extend(results)\n",
|
| 382 |
+
" else:\n",
|
| 383 |
+
" done = True\n",
|
| 384 |
+
" return response.choices[0].message.content"
|
| 385 |
+
]
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"cell_type": "code",
|
| 389 |
+
"execution_count": null,
|
| 390 |
+
"metadata": {},
|
| 391 |
+
"outputs": [
|
| 392 |
+
{
|
| 393 |
+
"name": "stdout",
|
| 394 |
+
"output_type": "stream",
|
| 395 |
+
"text": [
|
| 396 |
+
"* Running on local URL: http://127.0.0.1:7862\n",
|
| 397 |
+
"* To create a public link, set `share=True` in `launch()`.\n"
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"data": {
|
| 402 |
+
"text/html": [
|
| 403 |
+
"<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
| 404 |
+
],
|
| 405 |
+
"text/plain": [
|
| 406 |
+
"<IPython.core.display.HTML object>"
|
| 407 |
+
]
|
| 408 |
+
},
|
| 409 |
+
"metadata": {},
|
| 410 |
+
"output_type": "display_data"
|
| 411 |
+
},
|
| 412 |
+
{
|
| 413 |
+
"data": {
|
| 414 |
+
"text/plain": []
|
| 415 |
+
},
|
| 416 |
+
"execution_count": 19,
|
| 417 |
+
"metadata": {},
|
| 418 |
+
"output_type": "execute_result"
|
| 419 |
+
},
|
| 420 |
+
{
|
| 421 |
+
"name": "stdout",
|
| 422 |
+
"output_type": "stream",
|
| 423 |
+
"text": [
|
| 424 |
+
"Tool called: record_unknown_question\n",
|
| 425 |
+
"Push: Recording do you have a patent? asked that I couldn't answer\n",
|
| 426 |
+
"Tool called: record_unknown_question\n",
|
| 427 |
+
"Push: Recording What are Urvashi Patel's favorite musicians? asked that I couldn't answer\n",
|
| 428 |
+
"Tool called: record_user_details\n",
|
| 429 |
+
"Push: Recording interest from Name not provided with email [email protected] and notes not provided\n"
|
| 430 |
+
]
|
| 431 |
+
}
|
| 432 |
+
],
|
| 433 |
+
"source": [
|
| 434 |
+
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 435 |
+
]
|
| 436 |
+
},
|
| 437 |
+
{
|
| 438 |
+
"cell_type": "markdown",
|
| 439 |
+
"metadata": {},
|
| 440 |
+
"source": [
|
| 441 |
+
"## And now for deployment\n",
|
| 442 |
+
"\n",
|
| 443 |
+
"This code is in `app.py`\n",
|
| 444 |
+
"\n",
|
| 445 |
+
"We will deploy to HuggingFace Spaces.\n",
|
| 446 |
+
"\n",
|
| 447 |
+
"Before you start: remember to update the files in the \"me\" directory - your LinkedIn profile and summary.txt - so that it talks about you! Also change `self.name = \"Ed Donner\"` in `app.py`.. \n",
|
| 448 |
+
"\n",
|
| 449 |
+
"Also check that there's no README file within the 1_foundations directory. If there is one, please delete it. The deploy process creates a new README file in this directory for you.\n",
|
| 450 |
+
"\n",
|
| 451 |
+
"1. Visit https://huggingface.co and set up an account \n",
|
| 452 |
+
"2. From the Avatar menu on the top right, choose Access Tokens. Choose \"Create New Token\". Give it WRITE permissions - it needs to have WRITE permissions! Keep a record of your new key. \n",
|
| 453 |
+
"3. In the Terminal, run: `uv tool install 'huggingface_hub[cli]'` to install the HuggingFace tool, then `hf auth login --token YOUR_TOKEN_HERE`, like `hf auth login --token hf_xxxxxx`, to login at the command line with your key. Afterwards, run `hf auth whoami` to check you're logged in \n",
|
| 454 |
+
"4. Take your new token and add it to your .env file: `HF_TOKEN=hf_xxx` for the future\n",
|
| 455 |
+
"5. From the 1_foundations folder, enter: `uv run gradio deploy` \n",
|
| 456 |
+
"6. Follow its instructions: name it \"career_conversation\", specify app.py, choose cpu-basic as the hardware, say Yes to needing to supply secrets, provide your openai api key, your pushover user and token, and say \"no\" to github actions. \n",
|
| 457 |
+
"\n",
|
| 458 |
+
"Thank you Robert, James, Martins, Andras and Priya for these tips. \n",
|
| 459 |
+
"Please read the next 2 sections - how to change your Secrets, and how to redeploy your Space (you may need to delete the README.md that gets created in this 1_foundations directory).\n",
|
| 460 |
+
"\n",
|
| 461 |
+
"#### More about these secrets:\n",
|
| 462 |
+
"\n",
|
| 463 |
+
"If you're confused by what's going on with these secrets: it just wants you to enter the key name and value for each of your secrets -- so you would enter: \n",
|
| 464 |
+
"`OPENAI_API_KEY` \n",
|
| 465 |
+
"Followed by: \n",
|
| 466 |
+
"`sk-proj-...` \n",
|
| 467 |
+
"\n",
|
| 468 |
+
"And if you don't want to set secrets this way, or something goes wrong with it, it's no problem - you can change your secrets later: \n",
|
| 469 |
+
"1. Log in to HuggingFace website \n",
|
| 470 |
+
"2. Go to your profile screen via the Avatar menu on the top right \n",
|
| 471 |
+
"3. Select the Space you deployed \n",
|
| 472 |
+
"4. Click on the Settings wheel on the top right \n",
|
| 473 |
+
"5. You can scroll down to change your secrets (Variables and Secrets section), delete the space, etc.\n",
|
| 474 |
+
"\n",
|
| 475 |
+
"#### And now you should be deployed!\n",
|
| 476 |
+
"\n",
|
| 477 |
+
"If you want to completely replace everything and start again with your keys, you may need to delete the README.md that got created in this 1_foundations folder.\n",
|
| 478 |
+
"\n",
|
| 479 |
+
"Here is mine: https://huggingface.co/spaces/ed-donner/Career_Conversation\n",
|
| 480 |
+
"\n",
|
| 481 |
+
"I just got a push notification that a student asked me how they can become President of their country 😂😂\n",
|
| 482 |
+
"\n",
|
| 483 |
+
"For more information on deployment:\n",
|
| 484 |
+
"\n",
|
| 485 |
+
"https://www.gradio.app/guides/sharing-your-app#hosting-on-hf-spaces\n",
|
| 486 |
+
"\n",
|
| 487 |
+
"To delete your Space in the future: \n",
|
| 488 |
+
"1. Log in to HuggingFace\n",
|
| 489 |
+
"2. From the Avatar menu, select your profile\n",
|
| 490 |
+
"3. Click on the Space itself and select the settings wheel on the top right\n",
|
| 491 |
+
"4. Scroll to the Delete section at the bottom\n",
|
| 492 |
+
"5. ALSO: delete the README file that Gradio may have created inside this 1_foundations folder (otherwise it won't ask you the questions the next time you do a gradio deploy)\n"
|
| 493 |
+
]
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"cell_type": "markdown",
|
| 497 |
+
"metadata": {},
|
| 498 |
+
"source": [
|
| 499 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 500 |
+
" <tr>\n",
|
| 501 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 502 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 503 |
+
" </td>\n",
|
| 504 |
+
" <td>\n",
|
| 505 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 506 |
+
" <span style=\"color:#ff7800;\">• First and foremost, deploy this for yourself! It's a real, valuable tool - the future resume..<br/>\n",
|
| 507 |
+
" • Next, improve the resources - add better context about yourself. If you know RAG, then add a knowledge base about you.<br/>\n",
|
| 508 |
+
" • Add in more tools! You could have a SQL database with common Q&A that the LLM could read and write from?<br/>\n",
|
| 509 |
+
" • Bring in the Evaluator from the last lab, and add other Agentic patterns.\n",
|
| 510 |
+
" </span>\n",
|
| 511 |
+
" </td>\n",
|
| 512 |
+
" </tr>\n",
|
| 513 |
+
"</table>"
|
| 514 |
+
]
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"cell_type": "markdown",
|
| 518 |
+
"metadata": {},
|
| 519 |
+
"source": [
|
| 520 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 521 |
+
" <tr>\n",
|
| 522 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 523 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 524 |
+
" </td>\n",
|
| 525 |
+
" <td>\n",
|
| 526 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 527 |
+
" <span style=\"color:#00bfff;\">Aside from the obvious (your career alter-ego) this has business applications in any situation where you need an AI assistant with domain expertise and an ability to interact with the real world.\n",
|
| 528 |
+
" </span>\n",
|
| 529 |
+
" </td>\n",
|
| 530 |
+
" </tr>\n",
|
| 531 |
+
"</table>"
|
| 532 |
+
]
|
| 533 |
+
}
|
| 534 |
+
],
|
| 535 |
+
"metadata": {
|
| 536 |
+
"kernelspec": {
|
| 537 |
+
"display_name": ".venv",
|
| 538 |
+
"language": "python",
|
| 539 |
+
"name": "python3"
|
| 540 |
+
},
|
| 541 |
+
"language_info": {
|
| 542 |
+
"codemirror_mode": {
|
| 543 |
+
"name": "ipython",
|
| 544 |
+
"version": 3
|
| 545 |
+
},
|
| 546 |
+
"file_extension": ".py",
|
| 547 |
+
"mimetype": "text/x-python",
|
| 548 |
+
"name": "python",
|
| 549 |
+
"nbconvert_exporter": "python",
|
| 550 |
+
"pygments_lexer": "ipython3",
|
| 551 |
+
"version": "3.12.10"
|
| 552 |
+
}
|
| 553 |
+
},
|
| 554 |
+
"nbformat": 4,
|
| 555 |
+
"nbformat_minor": 2
|
| 556 |
+
}
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji: 🌖
|
| 4 |
-
colorFrom: yellow
|
| 5 |
-
colorTo: green
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 6.1.0
|
| 8 |
app_file: app.py
|
| 9 |
-
|
|
|
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: uv_run_gradio_deploy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
app_file: app.py
|
| 4 |
+
sdk: gradio
|
| 5 |
+
sdk_version: 5.49.1
|
| 6 |
---
|
|
|
|
|
|
Untitled
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
reflection_memory.py
|
app.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
from pypdf import PdfReader
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import requests
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
from reflection_memory import init_db, save_reflection, fetch_recent
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# ==================================================
|
| 13 |
+
# Environment
|
| 14 |
+
# ==================================================
|
| 15 |
+
|
| 16 |
+
load_dotenv(override=True)
|
| 17 |
+
|
| 18 |
+
PUSHOVER_TOKEN = os.getenv("PUSHOVER_TOKEN")
|
| 19 |
+
PUSHOVER_USER = os.getenv("PUSHOVER_USER")
|
| 20 |
+
|
| 21 |
+
MODEL_NAME = "gpt-4.1"
|
| 22 |
+
MAX_HISTORY_MESSAGES = 6
|
| 23 |
+
MAX_TURNS = 5
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# ==================================================
|
| 27 |
+
# Conversation awareness helpers
|
| 28 |
+
# ==================================================
|
| 29 |
+
|
| 30 |
+
UNCERTAINTY_MARKERS = (
|
| 31 |
+
"not sure",
|
| 32 |
+
"confused",
|
| 33 |
+
"stuck",
|
| 34 |
+
"overwhelmed",
|
| 35 |
+
"i don't know",
|
| 36 |
+
"lost",
|
| 37 |
+
"anxious",
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
def should_slow_down(text: str) -> bool:
|
| 41 |
+
return any(marker in text.lower() for marker in UNCERTAINTY_MARKERS)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# ==================================================
|
| 45 |
+
# Notifications (Pushover)
|
| 46 |
+
# ==================================================
|
| 47 |
+
|
| 48 |
+
def push(message: str) -> None:
|
| 49 |
+
if not PUSHOVER_TOKEN or not PUSHOVER_USER:
|
| 50 |
+
return
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
requests.post(
|
| 54 |
+
"https://api.pushover.net/1/messages.json",
|
| 55 |
+
data={
|
| 56 |
+
"token": PUSHOVER_TOKEN,
|
| 57 |
+
"user": PUSHOVER_USER,
|
| 58 |
+
"message": message,
|
| 59 |
+
},
|
| 60 |
+
timeout=5,
|
| 61 |
+
)
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print("Pushover failed:", e)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# ==================================================
|
| 67 |
+
# Tool Implementations
|
| 68 |
+
# ==================================================
|
| 69 |
+
|
| 70 |
+
def record_user_details(
|
| 71 |
+
email: str,
|
| 72 |
+
name: str = "Name not provided",
|
| 73 |
+
notes: str = "not provided",
|
| 74 |
+
):
|
| 75 |
+
push(f"New contact: {name} | {email} | Notes: {notes}")
|
| 76 |
+
return {"recorded": "ok"}
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def record_unknown_question(question: str):
|
| 80 |
+
push(f"Unknown question: {question}")
|
| 81 |
+
return {"recorded": "ok"}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# ==================================================
|
| 85 |
+
# Tool Schemas
|
| 86 |
+
# ==================================================
|
| 87 |
+
|
| 88 |
+
TOOLS = [
|
| 89 |
+
{
|
| 90 |
+
"type": "function",
|
| 91 |
+
"function": {
|
| 92 |
+
"name": "record_user_details",
|
| 93 |
+
"description": "Record that a user is interested in being in touch",
|
| 94 |
+
"parameters": {
|
| 95 |
+
"type": "object",
|
| 96 |
+
"properties": {
|
| 97 |
+
"email": {"type": "string"},
|
| 98 |
+
"name": {"type": "string"},
|
| 99 |
+
"notes": {"type": "string"},
|
| 100 |
+
},
|
| 101 |
+
"required": ["email"],
|
| 102 |
+
"additionalProperties": False,
|
| 103 |
+
},
|
| 104 |
+
},
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"type": "function",
|
| 108 |
+
"function": {
|
| 109 |
+
"name": "record_unknown_question",
|
| 110 |
+
"description": "Record any question that could not be answered",
|
| 111 |
+
"parameters": {
|
| 112 |
+
"type": "object",
|
| 113 |
+
"properties": {
|
| 114 |
+
"question": {"type": "string"},
|
| 115 |
+
},
|
| 116 |
+
"required": ["question"],
|
| 117 |
+
"additionalProperties": False,
|
| 118 |
+
},
|
| 119 |
+
},
|
| 120 |
+
},
|
| 121 |
+
]
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
# ==================================================
|
| 125 |
+
# Chatbot
|
| 126 |
+
# ==================================================
|
| 127 |
+
|
| 128 |
+
class Me:
|
| 129 |
+
def __init__(self):
|
| 130 |
+
self.client = OpenAI()
|
| 131 |
+
self.name = "Urvashi Patel"
|
| 132 |
+
|
| 133 |
+
init_db()
|
| 134 |
+
|
| 135 |
+
self.linkedin = self._load_pdf("me/linkedin.pdf")
|
| 136 |
+
self.summary = self._load_text("me/summary.txt")
|
| 137 |
+
self.about_urva = self._load_text("me/about_urva.md")
|
| 138 |
+
|
| 139 |
+
# -----------------------------
|
| 140 |
+
# Loaders
|
| 141 |
+
# -----------------------------
|
| 142 |
+
|
| 143 |
+
def _load_text(self, path: str) -> str:
|
| 144 |
+
try:
|
| 145 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 146 |
+
return f.read()
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"Failed to load {path}:", e)
|
| 149 |
+
return ""
|
| 150 |
+
|
| 151 |
+
def _load_pdf(self, path: str) -> str:
|
| 152 |
+
text = ""
|
| 153 |
+
try:
|
| 154 |
+
reader = PdfReader(path)
|
| 155 |
+
for page in reader.pages:
|
| 156 |
+
if page.extract_text():
|
| 157 |
+
text += page.extract_text()
|
| 158 |
+
except Exception as e:
|
| 159 |
+
print(f"Failed to load {path}:", e)
|
| 160 |
+
return text
|
| 161 |
+
|
| 162 |
+
# -----------------------------
|
| 163 |
+
# Prompt
|
| 164 |
+
# -----------------------------
|
| 165 |
+
|
| 166 |
+
def system_prompt(self) -> str:
|
| 167 |
+
return f"""
|
| 168 |
+
You are not a general AI assistant. You only answer as Urvashi Patel.
|
| 169 |
+
|
| 170 |
+
You are acting as {self.name} on her personal website.
|
| 171 |
+
Answer questions about career, background, skills, experience, and interests.
|
| 172 |
+
|
| 173 |
+
Be thoughtful, grounded, and reflective.
|
| 174 |
+
Avoid generic advice. Prefer nuance and long-term thinking.
|
| 175 |
+
|
| 176 |
+
If asked for medical, legal, or financial advice, politely decline.
|
| 177 |
+
|
| 178 |
+
When the user sounds uncertain:
|
| 179 |
+
- Ask ONE reflective question
|
| 180 |
+
- Do not rush to solutions
|
| 181 |
+
- Offer options, not instructions
|
| 182 |
+
|
| 183 |
+
## Personal Context
|
| 184 |
+
{self.about_urva}
|
| 185 |
+
|
| 186 |
+
## Summary
|
| 187 |
+
{self.summary}
|
| 188 |
+
|
| 189 |
+
## LinkedIn
|
| 190 |
+
{self.linkedin}
|
| 191 |
+
|
| 192 |
+
Stay in character at all times.
|
| 193 |
+
"""
|
| 194 |
+
|
| 195 |
+
# -----------------------------
|
| 196 |
+
# Tool handling
|
| 197 |
+
# -----------------------------
|
| 198 |
+
|
| 199 |
+
def handle_tool_calls(self, tool_calls):
|
| 200 |
+
results = []
|
| 201 |
+
|
| 202 |
+
for call in tool_calls:
|
| 203 |
+
tool_name = call.function.name
|
| 204 |
+
arguments = json.loads(call.function.arguments)
|
| 205 |
+
|
| 206 |
+
tool_fn = globals().get(tool_name)
|
| 207 |
+
result = tool_fn(**arguments) if tool_fn else {}
|
| 208 |
+
|
| 209 |
+
results.append(
|
| 210 |
+
{
|
| 211 |
+
"role": "tool",
|
| 212 |
+
"content": json.dumps(result),
|
| 213 |
+
"tool_call_id": call.id,
|
| 214 |
+
}
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
return results
|
| 218 |
+
|
| 219 |
+
# -----------------------------
|
| 220 |
+
# Chat loop
|
| 221 |
+
# -----------------------------
|
| 222 |
+
|
| 223 |
+
def chat(self, message, history):
|
| 224 |
+
trimmed_history = history[-MAX_HISTORY_MESSAGES:]
|
| 225 |
+
|
| 226 |
+
# --- fetch light reflection memory
|
| 227 |
+
recent_reflections = fetch_recent()
|
| 228 |
+
reflection_context = ""
|
| 229 |
+
if recent_reflections:
|
| 230 |
+
reflection_context = "\n".join(
|
| 231 |
+
f"- {r['theme']}: {r['summary']}"
|
| 232 |
+
for r in recent_reflections
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
messages = [{"role": "system", "content": self.system_prompt()}]
|
| 236 |
+
|
| 237 |
+
if reflection_context:
|
| 238 |
+
messages.append(
|
| 239 |
+
{
|
| 240 |
+
"role": "system",
|
| 241 |
+
"content": f"Recent reflections for continuity:\n{reflection_context}",
|
| 242 |
+
}
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
messages.extend(trimmed_history)
|
| 246 |
+
messages.append({"role": "user", "content": message})
|
| 247 |
+
|
| 248 |
+
for _ in range(MAX_TURNS):
|
| 249 |
+
response = self.client.chat.completions.create(
|
| 250 |
+
model=MODEL_NAME,
|
| 251 |
+
messages=messages,
|
| 252 |
+
tools=TOOLS,
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
choice = response.choices[0]
|
| 256 |
+
|
| 257 |
+
if choice.finish_reason == "tool_calls":
|
| 258 |
+
messages.append(choice.message)
|
| 259 |
+
messages.extend(self.handle_tool_calls(choice.message.tool_calls))
|
| 260 |
+
else:
|
| 261 |
+
final_answer = choice.message.content
|
| 262 |
+
|
| 263 |
+
# --- save reflection (short & intentional)
|
| 264 |
+
theme = "direction" if should_slow_down(message) else "general"
|
| 265 |
+
|
| 266 |
+
save_reflection(
|
| 267 |
+
theme=theme,
|
| 268 |
+
user_question=message,
|
| 269 |
+
assistant_summary=final_answer[:300],
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
return final_answer
|
| 273 |
+
|
| 274 |
+
return "Thanks for your question — feel free to rephrase or reach out directly."
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
# ==================================================
|
| 278 |
+
# UI
|
| 279 |
+
# ==================================================
|
| 280 |
+
|
| 281 |
+
if __name__ == "__main__":
|
| 282 |
+
me = Me()
|
| 283 |
+
gr.ChatInterface(me.chat, type="messages").launch()
|
community_contributions/1_foundations_using_gemini/1_lab1.ipynb
ADDED
|
@@ -0,0 +1,406 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
+
" <tr>\n",
|
| 16 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
+
" </td>\n",
|
| 19 |
+
" <td>\n",
|
| 20 |
+
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
+
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
+
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
+
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
+
" Well in that case, you're ready!!\n",
|
| 25 |
+
" </span>\n",
|
| 26 |
+
" </td>\n",
|
| 27 |
+
" </tr>\n",
|
| 28 |
+
"</table>"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"source": [
|
| 35 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
+
" <tr>\n",
|
| 37 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
+
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
+
" </td>\n",
|
| 40 |
+
" <td>\n",
|
| 41 |
+
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
+
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
+
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
+
" </span>\n",
|
| 45 |
+
" </td>\n",
|
| 46 |
+
" </tr>\n",
|
| 47 |
+
"</table>"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "markdown",
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"source": [
|
| 54 |
+
"### And please do remember to contact me if I can help\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
+
"\n",
|
| 61 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
+
"- Open extensions (View >> extensions)\n",
|
| 63 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
+
"\n",
|
| 67 |
+
"And then:\n",
|
| 68 |
+
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
+
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
+
"3. Enjoy!\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
+
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
+
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
+
"And then try again.\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
+
"`conda deactivate` \n",
|
| 81 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
+
"`conda config --set auto_activate_base false` \n",
|
| 83 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "code",
|
| 88 |
+
"execution_count": null,
|
| 89 |
+
"metadata": {},
|
| 90 |
+
"outputs": [],
|
| 91 |
+
"source": [
|
| 92 |
+
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"from dotenv import load_dotenv\n"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": null,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 104 |
+
"# If this returns false, see the next cell!\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"load_dotenv(override=True)"
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "markdown",
|
| 111 |
+
"metadata": {},
|
| 112 |
+
"source": [
|
| 113 |
+
"### Wait, did that just output `False`??\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 116 |
+
"\n",
|
| 117 |
+
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 118 |
+
"\n",
|
| 119 |
+
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"cell_type": "markdown",
|
| 124 |
+
"metadata": {},
|
| 125 |
+
"source": [
|
| 126 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 127 |
+
" <tr>\n",
|
| 128 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 129 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 130 |
+
" </td>\n",
|
| 131 |
+
" <td>\n",
|
| 132 |
+
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 133 |
+
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 134 |
+
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 135 |
+
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 136 |
+
" </span>\n",
|
| 137 |
+
" </td>\n",
|
| 138 |
+
" </tr>\n",
|
| 139 |
+
"</table>"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": null,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [],
|
| 147 |
+
"source": [
|
| 148 |
+
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"import os\n",
|
| 151 |
+
"gemini_api_key = os.getenv('GEMINI_API_KEY')\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"if gemini_api_key:\n",
|
| 154 |
+
" print(f\"OpenAI API Key exists and begins {gemini_api_key[:8]}\")\n",
|
| 155 |
+
"else:\n",
|
| 156 |
+
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 157 |
+
" \n"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": null,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"# And now - the all important import statement\n",
|
| 167 |
+
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 168 |
+
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"from openai import OpenAI"
|
| 171 |
+
]
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"cell_type": "code",
|
| 175 |
+
"execution_count": null,
|
| 176 |
+
"metadata": {},
|
| 177 |
+
"outputs": [],
|
| 178 |
+
"source": [
|
| 179 |
+
"# And now we'll create an instance of the OpenAI class\n",
|
| 180 |
+
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 181 |
+
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 182 |
+
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 183 |
+
"GEMINI_BASE_URL = \"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
| 184 |
+
"google_api_key = os.getenv(\"GOOGLE_API_KEY\")\n",
|
| 185 |
+
"gemini = OpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)"
|
| 186 |
+
]
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"cell_type": "code",
|
| 190 |
+
"execution_count": null,
|
| 191 |
+
"metadata": {},
|
| 192 |
+
"outputs": [],
|
| 193 |
+
"source": [
|
| 194 |
+
"# Create a list of messages in the familiar OpenAI format\n",
|
| 195 |
+
"\n",
|
| 196 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 197 |
+
]
|
| 198 |
+
},
|
| 199 |
+
{
|
| 200 |
+
"cell_type": "code",
|
| 201 |
+
"execution_count": null,
|
| 202 |
+
"metadata": {},
|
| 203 |
+
"outputs": [],
|
| 204 |
+
"source": [
|
| 205 |
+
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 206 |
+
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 207 |
+
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 208 |
+
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 209 |
+
"model = \"gemini-2.5-flash-preview-05-20\"\n",
|
| 210 |
+
"response = gemini.chat.completions.create(\n",
|
| 211 |
+
" model=model,\n",
|
| 212 |
+
" messages=messages\n",
|
| 213 |
+
")\n",
|
| 214 |
+
"\n",
|
| 215 |
+
"print(response.choices[0].message.content)\n"
|
| 216 |
+
]
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"cell_type": "code",
|
| 220 |
+
"execution_count": null,
|
| 221 |
+
"metadata": {},
|
| 222 |
+
"outputs": [],
|
| 223 |
+
"source": [
|
| 224 |
+
"# And now - let's ask for a question:\n",
|
| 225 |
+
"\n",
|
| 226 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 227 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"cell_type": "code",
|
| 232 |
+
"execution_count": null,
|
| 233 |
+
"metadata": {},
|
| 234 |
+
"outputs": [],
|
| 235 |
+
"source": [
|
| 236 |
+
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"response = gemini.chat.completions.create(\n",
|
| 239 |
+
" model=model,\n",
|
| 240 |
+
" messages=messages\n",
|
| 241 |
+
")\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"question = response.choices[0].message.content\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"print(question)\n"
|
| 246 |
+
]
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"cell_type": "code",
|
| 250 |
+
"execution_count": null,
|
| 251 |
+
"metadata": {},
|
| 252 |
+
"outputs": [],
|
| 253 |
+
"source": [
|
| 254 |
+
"# form a new messages list\n",
|
| 255 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 256 |
+
]
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"cell_type": "code",
|
| 260 |
+
"execution_count": null,
|
| 261 |
+
"metadata": {},
|
| 262 |
+
"outputs": [],
|
| 263 |
+
"source": [
|
| 264 |
+
"# Ask it again\n",
|
| 265 |
+
"\n",
|
| 266 |
+
"response = gemini.chat.completions.create(\n",
|
| 267 |
+
" model=model,\n",
|
| 268 |
+
" messages=messages\n",
|
| 269 |
+
")\n",
|
| 270 |
+
"\n",
|
| 271 |
+
"answer = response.choices[0].message.content\n",
|
| 272 |
+
"print(answer)\n"
|
| 273 |
+
]
|
| 274 |
+
},
|
| 275 |
+
{
|
| 276 |
+
"cell_type": "code",
|
| 277 |
+
"execution_count": null,
|
| 278 |
+
"metadata": {},
|
| 279 |
+
"outputs": [],
|
| 280 |
+
"source": [
|
| 281 |
+
"from IPython.display import Markdown, display\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"display(Markdown(answer))\n",
|
| 284 |
+
"\n"
|
| 285 |
+
]
|
| 286 |
+
},
|
| 287 |
+
{
|
| 288 |
+
"cell_type": "markdown",
|
| 289 |
+
"metadata": {},
|
| 290 |
+
"source": [
|
| 291 |
+
"# Congratulations!\n",
|
| 292 |
+
"\n",
|
| 293 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 294 |
+
"\n",
|
| 295 |
+
"Next time things get more interesting..."
|
| 296 |
+
]
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"cell_type": "markdown",
|
| 300 |
+
"metadata": {},
|
| 301 |
+
"source": [
|
| 302 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 303 |
+
" <tr>\n",
|
| 304 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 305 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 306 |
+
" </td>\n",
|
| 307 |
+
" <td>\n",
|
| 308 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 309 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 310 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 311 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 312 |
+
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 313 |
+
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 314 |
+
" </span>\n",
|
| 315 |
+
" </td>\n",
|
| 316 |
+
" </tr>\n",
|
| 317 |
+
"</table>"
|
| 318 |
+
]
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"cell_type": "code",
|
| 322 |
+
"execution_count": null,
|
| 323 |
+
"metadata": {},
|
| 324 |
+
"outputs": [],
|
| 325 |
+
"source": [
|
| 326 |
+
"# First create the messages:\n",
|
| 327 |
+
"\n",
|
| 328 |
+
"messages = [{\"role\": \"user\", \"content\": \"Pick a business area that might be worth exploring for an Agentic AI opportunity.\"}]\n",
|
| 329 |
+
"\n",
|
| 330 |
+
"# Then make the first call:\n",
|
| 331 |
+
"\n",
|
| 332 |
+
"response = gemini.chat.completions.create(\n",
|
| 333 |
+
" model=model,\n",
|
| 334 |
+
" messages=messages\n",
|
| 335 |
+
")\n",
|
| 336 |
+
"\n",
|
| 337 |
+
"# Then read the business idea:\n",
|
| 338 |
+
"\n",
|
| 339 |
+
"business_idea = response.choices[0].message.content\n",
|
| 340 |
+
"\n",
|
| 341 |
+
"\n",
|
| 342 |
+
"display(Markdown(business_idea))\n",
|
| 343 |
+
"\n",
|
| 344 |
+
"# And repeat! In the next message, include the business idea within the message"
|
| 345 |
+
]
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"cell_type": "code",
|
| 349 |
+
"execution_count": null,
|
| 350 |
+
"metadata": {},
|
| 351 |
+
"outputs": [],
|
| 352 |
+
"source": [
|
| 353 |
+
"messages = [{\"role\": \"user\", \"content\": f\"Present a pain-point in that {business_idea} industry - something challenging that might be ripe for an Agentic solution.\"}]\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"response = gemini.chat.completions.create(\n",
|
| 356 |
+
" model=model,\n",
|
| 357 |
+
" messages=messages\n",
|
| 358 |
+
")\n",
|
| 359 |
+
"\n",
|
| 360 |
+
"pain_point = response.choices[0].message.content\n",
|
| 361 |
+
"\n",
|
| 362 |
+
"display(Markdown(pain_point))"
|
| 363 |
+
]
|
| 364 |
+
},
|
| 365 |
+
{
|
| 366 |
+
"cell_type": "code",
|
| 367 |
+
"execution_count": null,
|
| 368 |
+
"metadata": {},
|
| 369 |
+
"outputs": [],
|
| 370 |
+
"source": [
|
| 371 |
+
"messages = [{\"role\": \"user\", \"content\": f\"Propose an Agentic AI solution to the {pain_point} in the {business_idea} industry.\"}]\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"response = gemini.chat.completions.create(\n",
|
| 374 |
+
" model=model,\n",
|
| 375 |
+
" messages=messages\n",
|
| 376 |
+
")\n",
|
| 377 |
+
"\n",
|
| 378 |
+
"agentic_solution = response.choices[0].message.content\n",
|
| 379 |
+
"\n",
|
| 380 |
+
"display(Markdown(agentic_solution))\n",
|
| 381 |
+
"\n"
|
| 382 |
+
]
|
| 383 |
+
}
|
| 384 |
+
],
|
| 385 |
+
"metadata": {
|
| 386 |
+
"kernelspec": {
|
| 387 |
+
"display_name": ".venv",
|
| 388 |
+
"language": "python",
|
| 389 |
+
"name": "python3"
|
| 390 |
+
},
|
| 391 |
+
"language_info": {
|
| 392 |
+
"codemirror_mode": {
|
| 393 |
+
"name": "ipython",
|
| 394 |
+
"version": 3
|
| 395 |
+
},
|
| 396 |
+
"file_extension": ".py",
|
| 397 |
+
"mimetype": "text/x-python",
|
| 398 |
+
"name": "python",
|
| 399 |
+
"nbconvert_exporter": "python",
|
| 400 |
+
"pygments_lexer": "ipython3",
|
| 401 |
+
"version": "3.12.12"
|
| 402 |
+
}
|
| 403 |
+
},
|
| 404 |
+
"nbformat": 4,
|
| 405 |
+
"nbformat_minor": 2
|
| 406 |
+
}
|
community_contributions/1_foundations_using_gemini/2_lab2.ipynb
ADDED
|
@@ -0,0 +1,492 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": null,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"import os\n",
|
| 39 |
+
"import json\n",
|
| 40 |
+
"from dotenv import load_dotenv\n",
|
| 41 |
+
"from openai import OpenAI\n",
|
| 42 |
+
"from anthropic import Anthropic\n",
|
| 43 |
+
"from IPython.display import Markdown, display"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"# Always remember to do this!\n",
|
| 53 |
+
"load_dotenv(override=True)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"if openai_api_key:\n",
|
| 71 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
+
" \n",
|
| 75 |
+
"if anthropic_api_key:\n",
|
| 76 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"if google_api_key:\n",
|
| 81 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
+
"else:\n",
|
| 83 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"if deepseek_api_key:\n",
|
| 86 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
+
"else:\n",
|
| 88 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"if groq_api_key:\n",
|
| 91 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
+
"else:\n",
|
| 93 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": null,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "code",
|
| 109 |
+
"execution_count": null,
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"source": [
|
| 113 |
+
"messages"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": null,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [],
|
| 121 |
+
"source": [
|
| 122 |
+
"openai = OpenAI()\n",
|
| 123 |
+
"response = openai.chat.completions.create(\n",
|
| 124 |
+
" model=\"gpt-5-mini\",\n",
|
| 125 |
+
" messages=messages,\n",
|
| 126 |
+
")\n",
|
| 127 |
+
"question = response.choices[0].message.content\n",
|
| 128 |
+
"print(question)\n"
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"cell_type": "code",
|
| 133 |
+
"execution_count": null,
|
| 134 |
+
"metadata": {},
|
| 135 |
+
"outputs": [],
|
| 136 |
+
"source": [
|
| 137 |
+
"competitors = []\n",
|
| 138 |
+
"answers = []\n",
|
| 139 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "markdown",
|
| 144 |
+
"metadata": {},
|
| 145 |
+
"source": [
|
| 146 |
+
"## Note - update since the videos\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"I've updated the model names to use the latest models below, like GPT 5 and Claude Sonnet 4.5. It's worth noting that these models can be quite slow - like 1-2 minutes - but they do a great job! Feel free to switch them for faster models if you'd prefer, like the ones I use in the video."
|
| 149 |
+
]
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"cell_type": "code",
|
| 153 |
+
"execution_count": null,
|
| 154 |
+
"metadata": {},
|
| 155 |
+
"outputs": [],
|
| 156 |
+
"source": [
|
| 157 |
+
"# The API we know well\n",
|
| 158 |
+
"# I've updated this with the latest model, but it can take some time because it likes to think!\n",
|
| 159 |
+
"# Replace the model with gpt-4.1-mini if you'd prefer not to wait 1-2 mins\n",
|
| 160 |
+
"\n",
|
| 161 |
+
"model_name = \"gpt-5-nano\"\n",
|
| 162 |
+
"\n",
|
| 163 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 164 |
+
"answer = response.choices[0].message.content\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"display(Markdown(answer))\n",
|
| 167 |
+
"competitors.append(model_name)\n",
|
| 168 |
+
"answers.append(answer)"
|
| 169 |
+
]
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"cell_type": "code",
|
| 173 |
+
"execution_count": null,
|
| 174 |
+
"metadata": {},
|
| 175 |
+
"outputs": [],
|
| 176 |
+
"source": [
|
| 177 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 178 |
+
"\n",
|
| 179 |
+
"model_name = \"claude-sonnet-4-5\"\n",
|
| 180 |
+
"\n",
|
| 181 |
+
"claude = Anthropic()\n",
|
| 182 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 183 |
+
"answer = response.content[0].text\n",
|
| 184 |
+
"\n",
|
| 185 |
+
"display(Markdown(answer))\n",
|
| 186 |
+
"competitors.append(model_name)\n",
|
| 187 |
+
"answers.append(answer)"
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"cell_type": "code",
|
| 192 |
+
"execution_count": null,
|
| 193 |
+
"metadata": {},
|
| 194 |
+
"outputs": [],
|
| 195 |
+
"source": [
|
| 196 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 197 |
+
"model_name = \"gemini-2.5-flash\"\n",
|
| 198 |
+
"\n",
|
| 199 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 200 |
+
"answer = response.choices[0].message.content\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"display(Markdown(answer))\n",
|
| 203 |
+
"competitors.append(model_name)\n",
|
| 204 |
+
"answers.append(answer)"
|
| 205 |
+
]
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"cell_type": "code",
|
| 209 |
+
"execution_count": null,
|
| 210 |
+
"metadata": {},
|
| 211 |
+
"outputs": [],
|
| 212 |
+
"source": [
|
| 213 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 214 |
+
"model_name = \"deepseek-chat\"\n",
|
| 215 |
+
"\n",
|
| 216 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 217 |
+
"answer = response.choices[0].message.content\n",
|
| 218 |
+
"\n",
|
| 219 |
+
"display(Markdown(answer))\n",
|
| 220 |
+
"competitors.append(model_name)\n",
|
| 221 |
+
"answers.append(answer)"
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"cell_type": "code",
|
| 226 |
+
"execution_count": null,
|
| 227 |
+
"metadata": {},
|
| 228 |
+
"outputs": [],
|
| 229 |
+
"source": [
|
| 230 |
+
"# Updated with the latest Open Source model from OpenAI\n",
|
| 231 |
+
"\n",
|
| 232 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 233 |
+
"model_name = \"openai/gpt-oss-120b\"\n",
|
| 234 |
+
"\n",
|
| 235 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 236 |
+
"answer = response.choices[0].message.content\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"display(Markdown(answer))\n",
|
| 239 |
+
"competitors.append(model_name)\n",
|
| 240 |
+
"answers.append(answer)\n"
|
| 241 |
+
]
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"cell_type": "markdown",
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"source": [
|
| 247 |
+
"## For the next cell, we will use Ollama\n",
|
| 248 |
+
"\n",
|
| 249 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 250 |
+
"and runs models locally using high performance C++ code.\n",
|
| 251 |
+
"\n",
|
| 252 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 253 |
+
"\n",
|
| 254 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 255 |
+
"\n",
|
| 256 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 257 |
+
"\n",
|
| 258 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 259 |
+
"\n",
|
| 260 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 261 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 262 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 263 |
+
]
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"cell_type": "markdown",
|
| 267 |
+
"metadata": {},
|
| 268 |
+
"source": [
|
| 269 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 270 |
+
" <tr>\n",
|
| 271 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 272 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 273 |
+
" </td>\n",
|
| 274 |
+
" <td>\n",
|
| 275 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 276 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 277 |
+
" </span>\n",
|
| 278 |
+
" </td>\n",
|
| 279 |
+
" </tr>\n",
|
| 280 |
+
"</table>"
|
| 281 |
+
]
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"cell_type": "code",
|
| 285 |
+
"execution_count": null,
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"outputs": [],
|
| 288 |
+
"source": [
|
| 289 |
+
"!ollama pull llama3.2"
|
| 290 |
+
]
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"cell_type": "code",
|
| 294 |
+
"execution_count": null,
|
| 295 |
+
"metadata": {},
|
| 296 |
+
"outputs": [],
|
| 297 |
+
"source": [
|
| 298 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 299 |
+
"model_name = \"llama3.2\"\n",
|
| 300 |
+
"\n",
|
| 301 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 302 |
+
"answer = response.choices[0].message.content\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"display(Markdown(answer))\n",
|
| 305 |
+
"competitors.append(model_name)\n",
|
| 306 |
+
"answers.append(answer)"
|
| 307 |
+
]
|
| 308 |
+
},
|
| 309 |
+
{
|
| 310 |
+
"cell_type": "code",
|
| 311 |
+
"execution_count": null,
|
| 312 |
+
"metadata": {},
|
| 313 |
+
"outputs": [],
|
| 314 |
+
"source": [
|
| 315 |
+
"# So where are we?\n",
|
| 316 |
+
"\n",
|
| 317 |
+
"print(competitors)\n",
|
| 318 |
+
"print(answers)\n"
|
| 319 |
+
]
|
| 320 |
+
},
|
| 321 |
+
{
|
| 322 |
+
"cell_type": "code",
|
| 323 |
+
"execution_count": null,
|
| 324 |
+
"metadata": {},
|
| 325 |
+
"outputs": [],
|
| 326 |
+
"source": [
|
| 327 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 328 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 329 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 330 |
+
]
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"cell_type": "code",
|
| 334 |
+
"execution_count": null,
|
| 335 |
+
"metadata": {},
|
| 336 |
+
"outputs": [],
|
| 337 |
+
"source": [
|
| 338 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 339 |
+
"\n",
|
| 340 |
+
"together = \"\"\n",
|
| 341 |
+
"for index, answer in enumerate(answers):\n",
|
| 342 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 343 |
+
" together += answer + \"\\n\\n\""
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"cell_type": "code",
|
| 348 |
+
"execution_count": null,
|
| 349 |
+
"metadata": {},
|
| 350 |
+
"outputs": [],
|
| 351 |
+
"source": [
|
| 352 |
+
"print(together)"
|
| 353 |
+
]
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"cell_type": "markdown",
|
| 357 |
+
"metadata": {},
|
| 358 |
+
"source": []
|
| 359 |
+
},
|
| 360 |
+
{
|
| 361 |
+
"cell_type": "code",
|
| 362 |
+
"execution_count": null,
|
| 363 |
+
"metadata": {},
|
| 364 |
+
"outputs": [],
|
| 365 |
+
"source": [
|
| 366 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 367 |
+
"Each model has been given this question:\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"{question}\n",
|
| 370 |
+
"\n",
|
| 371 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 372 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 373 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 374 |
+
"\n",
|
| 375 |
+
"Here are the responses from each competitor:\n",
|
| 376 |
+
"\n",
|
| 377 |
+
"{together}\n",
|
| 378 |
+
"\n",
|
| 379 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"cell_type": "code",
|
| 384 |
+
"execution_count": null,
|
| 385 |
+
"metadata": {},
|
| 386 |
+
"outputs": [],
|
| 387 |
+
"source": [
|
| 388 |
+
"print(judge)"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"cell_type": "code",
|
| 393 |
+
"execution_count": null,
|
| 394 |
+
"metadata": {},
|
| 395 |
+
"outputs": [],
|
| 396 |
+
"source": [
|
| 397 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"cell_type": "code",
|
| 402 |
+
"execution_count": null,
|
| 403 |
+
"metadata": {},
|
| 404 |
+
"outputs": [],
|
| 405 |
+
"source": [
|
| 406 |
+
"# Judgement time!\n",
|
| 407 |
+
"\n",
|
| 408 |
+
"openai = OpenAI()\n",
|
| 409 |
+
"response = openai.chat.completions.create(\n",
|
| 410 |
+
" model=\"gpt-5-mini\",\n",
|
| 411 |
+
" messages=judge_messages,\n",
|
| 412 |
+
")\n",
|
| 413 |
+
"results = response.choices[0].message.content\n",
|
| 414 |
+
"print(results)\n"
|
| 415 |
+
]
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"cell_type": "code",
|
| 419 |
+
"execution_count": null,
|
| 420 |
+
"metadata": {},
|
| 421 |
+
"outputs": [],
|
| 422 |
+
"source": [
|
| 423 |
+
"# OK let's turn this into results!\n",
|
| 424 |
+
"\n",
|
| 425 |
+
"results_dict = json.loads(results)\n",
|
| 426 |
+
"ranks = results_dict[\"results\"]\n",
|
| 427 |
+
"for index, result in enumerate(ranks):\n",
|
| 428 |
+
" competitor = competitors[int(result)-1]\n",
|
| 429 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 430 |
+
]
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"cell_type": "markdown",
|
| 434 |
+
"metadata": {},
|
| 435 |
+
"source": [
|
| 436 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 437 |
+
" <tr>\n",
|
| 438 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 439 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 440 |
+
" </td>\n",
|
| 441 |
+
" <td>\n",
|
| 442 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 443 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 444 |
+
" </span>\n",
|
| 445 |
+
" </td>\n",
|
| 446 |
+
" </tr>\n",
|
| 447 |
+
"</table>"
|
| 448 |
+
]
|
| 449 |
+
},
|
| 450 |
+
{
|
| 451 |
+
"cell_type": "markdown",
|
| 452 |
+
"metadata": {},
|
| 453 |
+
"source": [
|
| 454 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 455 |
+
" <tr>\n",
|
| 456 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 457 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 458 |
+
" </td>\n",
|
| 459 |
+
" <td>\n",
|
| 460 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 461 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 462 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 463 |
+
" to business projects where accuracy is critical.\n",
|
| 464 |
+
" </span>\n",
|
| 465 |
+
" </td>\n",
|
| 466 |
+
" </tr>\n",
|
| 467 |
+
"</table>"
|
| 468 |
+
]
|
| 469 |
+
}
|
| 470 |
+
],
|
| 471 |
+
"metadata": {
|
| 472 |
+
"kernelspec": {
|
| 473 |
+
"display_name": ".venv",
|
| 474 |
+
"language": "python",
|
| 475 |
+
"name": "python3"
|
| 476 |
+
},
|
| 477 |
+
"language_info": {
|
| 478 |
+
"codemirror_mode": {
|
| 479 |
+
"name": "ipython",
|
| 480 |
+
"version": 3
|
| 481 |
+
},
|
| 482 |
+
"file_extension": ".py",
|
| 483 |
+
"mimetype": "text/x-python",
|
| 484 |
+
"name": "python",
|
| 485 |
+
"nbconvert_exporter": "python",
|
| 486 |
+
"pygments_lexer": "ipython3",
|
| 487 |
+
"version": "3.12.12"
|
| 488 |
+
}
|
| 489 |
+
},
|
| 490 |
+
"nbformat": 4,
|
| 491 |
+
"nbformat_minor": 2
|
| 492 |
+
}
|
community_contributions/1_foundations_using_gemini/3_lab3.ipynb
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to Lab 3 for Week 1 Day 4\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we're going to build something with immediate value!\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"In the folder `me` I've put a single file `linkedin.pdf` - it's a PDF download of my LinkedIn profile.\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"Please replace it with yours!\n",
|
| 14 |
+
"\n",
|
| 15 |
+
"I've also made a file called `summary.txt`\n",
|
| 16 |
+
"\n",
|
| 17 |
+
"We're not going to use Tools just yet - we're going to add the tool tomorrow."
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "markdown",
|
| 22 |
+
"metadata": {},
|
| 23 |
+
"source": [
|
| 24 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 25 |
+
" <tr>\n",
|
| 26 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 27 |
+
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 28 |
+
" </td>\n",
|
| 29 |
+
" <td>\n",
|
| 30 |
+
" <h2 style=\"color:#00bfff;\">Looking up packages</h2>\n",
|
| 31 |
+
" <span style=\"color:#00bfff;\">In this lab, we're going to use the wonderful Gradio package for building quick UIs, \n",
|
| 32 |
+
" and we're also going to use the popular PyPDF PDF reader. You can get guides to these packages by asking \n",
|
| 33 |
+
" ChatGPT or Claude, and you find all open-source packages on the repository <a href=\"https://pypi.org\">https://pypi.org</a>.\n",
|
| 34 |
+
" </span>\n",
|
| 35 |
+
" </td>\n",
|
| 36 |
+
" </tr>\n",
|
| 37 |
+
"</table>"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": null,
|
| 43 |
+
"metadata": {},
|
| 44 |
+
"outputs": [],
|
| 45 |
+
"source": [
|
| 46 |
+
"# If you don't know what any of these packages do - you can always ask ChatGPT for a guide!\n",
|
| 47 |
+
"\n",
|
| 48 |
+
"from dotenv import load_dotenv\n",
|
| 49 |
+
"from openai import OpenAI\n",
|
| 50 |
+
"from pypdf import PdfReader\n",
|
| 51 |
+
"import os\n",
|
| 52 |
+
"import gradio as gr"
|
| 53 |
+
]
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"cell_type": "code",
|
| 57 |
+
"execution_count": null,
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"outputs": [],
|
| 60 |
+
"source": [
|
| 61 |
+
"load_dotenv(override=True)\n",
|
| 62 |
+
"GEMINI_BASE_URL = \"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
| 63 |
+
"google_api_key = os.getenv(\"GOOGLE_API_KEY\")\n",
|
| 64 |
+
"gemini = OpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)"
|
| 65 |
+
]
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"cell_type": "code",
|
| 69 |
+
"execution_count": null,
|
| 70 |
+
"metadata": {},
|
| 71 |
+
"outputs": [],
|
| 72 |
+
"source": [
|
| 73 |
+
"reader = PdfReader(\"me/linkedin.pdf\")\n",
|
| 74 |
+
"linkedin = \"\"\n",
|
| 75 |
+
"for page in reader.pages:\n",
|
| 76 |
+
" text = page.extract_text()\n",
|
| 77 |
+
" if text:\n",
|
| 78 |
+
" linkedin += text"
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"cell_type": "code",
|
| 83 |
+
"execution_count": null,
|
| 84 |
+
"metadata": {},
|
| 85 |
+
"outputs": [],
|
| 86 |
+
"source": [
|
| 87 |
+
"print(linkedin)"
|
| 88 |
+
]
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"cell_type": "code",
|
| 92 |
+
"execution_count": null,
|
| 93 |
+
"metadata": {},
|
| 94 |
+
"outputs": [],
|
| 95 |
+
"source": [
|
| 96 |
+
"with open(\"me/summary.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
| 97 |
+
" summary = f.read()"
|
| 98 |
+
]
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"cell_type": "code",
|
| 102 |
+
"execution_count": null,
|
| 103 |
+
"metadata": {},
|
| 104 |
+
"outputs": [],
|
| 105 |
+
"source": [
|
| 106 |
+
"name = \"Harsh Patidar\""
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "code",
|
| 111 |
+
"execution_count": null,
|
| 112 |
+
"metadata": {},
|
| 113 |
+
"outputs": [],
|
| 114 |
+
"source": [
|
| 115 |
+
"system_prompt = f\"You are acting as {name}. You are answering questions on {name}'s website, \\\n",
|
| 116 |
+
"particularly questions related to {name}'s career, background, skills and experience. \\\n",
|
| 117 |
+
"Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \\\n",
|
| 118 |
+
"You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. \\\n",
|
| 119 |
+
"Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 120 |
+
"If you don't know the answer, say so.\"\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 123 |
+
"system_prompt += f\"With this context, please chat with the user, always staying in character as {name}.\"\n"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"cell_type": "code",
|
| 128 |
+
"execution_count": null,
|
| 129 |
+
"metadata": {},
|
| 130 |
+
"outputs": [],
|
| 131 |
+
"source": [
|
| 132 |
+
"system_prompt"
|
| 133 |
+
]
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"cell_type": "code",
|
| 137 |
+
"execution_count": null,
|
| 138 |
+
"metadata": {},
|
| 139 |
+
"outputs": [],
|
| 140 |
+
"source": [
|
| 141 |
+
"model_name = \"gemini-2.5-flash-preview-05-20\""
|
| 142 |
+
]
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"cell_type": "code",
|
| 146 |
+
"execution_count": null,
|
| 147 |
+
"metadata": {},
|
| 148 |
+
"outputs": [],
|
| 149 |
+
"source": [
|
| 150 |
+
"def chat(message, history):\n",
|
| 151 |
+
" messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 152 |
+
" response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
+
" return response.choices[0].message.content"
|
| 154 |
+
]
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"cell_type": "markdown",
|
| 158 |
+
"metadata": {},
|
| 159 |
+
"source": [
|
| 160 |
+
"## Special note for people not using OpenAI\n",
|
| 161 |
+
"\n",
|
| 162 |
+
"Some providers, like Groq, might give an error when you send your second message in the chat.\n",
|
| 163 |
+
"\n",
|
| 164 |
+
"This is because Gradio shoves some extra fields into the history object. OpenAI doesn't mind; but some other models complain.\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"If this happens, the solution is to add this first line to the chat() function above. It cleans up the history variable:\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"```python\n",
|
| 169 |
+
"history = [{\"role\": h[\"role\"], \"content\": h[\"content\"]} for h in history]\n",
|
| 170 |
+
"```\n",
|
| 171 |
+
"\n",
|
| 172 |
+
"You may need to add this in other chat() callback functions in the future, too."
|
| 173 |
+
]
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"cell_type": "code",
|
| 177 |
+
"execution_count": null,
|
| 178 |
+
"metadata": {},
|
| 179 |
+
"outputs": [],
|
| 180 |
+
"source": [
|
| 181 |
+
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 182 |
+
]
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"cell_type": "markdown",
|
| 186 |
+
"metadata": {},
|
| 187 |
+
"source": [
|
| 188 |
+
"## A lot is about to happen...\n",
|
| 189 |
+
"\n",
|
| 190 |
+
"1. Be able to ask an LLM to evaluate an answer\n",
|
| 191 |
+
"2. Be able to rerun if the answer fails evaluation\n",
|
| 192 |
+
"3. Put this together into 1 workflow\n",
|
| 193 |
+
"\n",
|
| 194 |
+
"All without any Agentic framework!"
|
| 195 |
+
]
|
| 196 |
+
},
|
| 197 |
+
{
|
| 198 |
+
"cell_type": "code",
|
| 199 |
+
"execution_count": null,
|
| 200 |
+
"metadata": {},
|
| 201 |
+
"outputs": [],
|
| 202 |
+
"source": [
|
| 203 |
+
"# Create a Pydantic model for the Evaluation\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"from pydantic import BaseModel\n",
|
| 206 |
+
"\n",
|
| 207 |
+
"class Evaluation(BaseModel):\n",
|
| 208 |
+
" is_acceptable: bool\n",
|
| 209 |
+
" feedback: str\n"
|
| 210 |
+
]
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"cell_type": "code",
|
| 214 |
+
"execution_count": null,
|
| 215 |
+
"metadata": {},
|
| 216 |
+
"outputs": [],
|
| 217 |
+
"source": [
|
| 218 |
+
"evaluator_system_prompt = f\"You are an evaluator that decides whether a response to a question is acceptable. \\\n",
|
| 219 |
+
"You are provided with a conversation between a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \\\n",
|
| 220 |
+
"The Agent is playing the role of {name} and is representing {name} on their website. \\\n",
|
| 221 |
+
"The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 222 |
+
"The Agent has been provided with context on {name} in the form of their summary and LinkedIn details. Here's the information:\"\n",
|
| 223 |
+
"\n",
|
| 224 |
+
"evaluator_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 225 |
+
"evaluator_system_prompt += f\"With this context, please evaluate the latest response, replying with whether the response is acceptable and your feedback.\""
|
| 226 |
+
]
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"cell_type": "code",
|
| 230 |
+
"execution_count": null,
|
| 231 |
+
"metadata": {},
|
| 232 |
+
"outputs": [],
|
| 233 |
+
"source": [
|
| 234 |
+
"def evaluator_user_prompt(reply, message, history):\n",
|
| 235 |
+
" user_prompt = f\"Here's the conversation between the User and the Agent: \\n\\n{history}\\n\\n\"\n",
|
| 236 |
+
" user_prompt += f\"Here's the latest message from the User: \\n\\n{message}\\n\\n\"\n",
|
| 237 |
+
" user_prompt += f\"Here's the latest response from the Agent: \\n\\n{reply}\\n\\n\"\n",
|
| 238 |
+
" user_prompt += \"Please evaluate the response, replying with whether it is acceptable and your feedback.\"\n",
|
| 239 |
+
" return user_prompt"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "code",
|
| 244 |
+
"execution_count": null,
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"outputs": [],
|
| 247 |
+
"source": [
|
| 248 |
+
"import os\n",
|
| 249 |
+
"gemini = OpenAI(\n",
|
| 250 |
+
" api_key=os.getenv(\"GOOGLE_API_KEY\"), \n",
|
| 251 |
+
" base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
| 252 |
+
")"
|
| 253 |
+
]
|
| 254 |
+
},
|
| 255 |
+
{
|
| 256 |
+
"cell_type": "code",
|
| 257 |
+
"execution_count": null,
|
| 258 |
+
"metadata": {},
|
| 259 |
+
"outputs": [],
|
| 260 |
+
"source": [
|
| 261 |
+
"def evaluate(reply, message, history) -> Evaluation:\n",
|
| 262 |
+
"\n",
|
| 263 |
+
" messages = [{\"role\": \"system\", \"content\": evaluator_system_prompt}] + [{\"role\": \"user\", \"content\": evaluator_user_prompt(reply, message, history)}]\n",
|
| 264 |
+
" response = gemini.beta.chat.completions.parse(model=model_name, messages=messages, response_format=Evaluation)\n",
|
| 265 |
+
" return response.choices[0].message.parsed"
|
| 266 |
+
]
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"cell_type": "code",
|
| 270 |
+
"execution_count": null,
|
| 271 |
+
"metadata": {},
|
| 272 |
+
"outputs": [],
|
| 273 |
+
"source": [
|
| 274 |
+
"messages = [{\"role\": \"system\", \"content\": system_prompt}] + [{\"role\": \"user\", \"content\": \"do you hold a patent?\"}]\n",
|
| 275 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 276 |
+
"reply = response.choices[0].message.content"
|
| 277 |
+
]
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
"cell_type": "code",
|
| 281 |
+
"execution_count": null,
|
| 282 |
+
"metadata": {},
|
| 283 |
+
"outputs": [],
|
| 284 |
+
"source": [
|
| 285 |
+
"reply"
|
| 286 |
+
]
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
"cell_type": "code",
|
| 290 |
+
"execution_count": null,
|
| 291 |
+
"metadata": {},
|
| 292 |
+
"outputs": [],
|
| 293 |
+
"source": [
|
| 294 |
+
"evaluate(reply, \"do you hold a patent?\", messages[:1])"
|
| 295 |
+
]
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"cell_type": "code",
|
| 299 |
+
"execution_count": null,
|
| 300 |
+
"metadata": {},
|
| 301 |
+
"outputs": [],
|
| 302 |
+
"source": [
|
| 303 |
+
"def rerun(reply, message, history, feedback):\n",
|
| 304 |
+
" updated_system_prompt = system_prompt + \"\\n\\n## Previous answer rejected\\nYou just tried to reply, but the quality control rejected your reply\\n\"\n",
|
| 305 |
+
" updated_system_prompt += f\"## Your attempted answer:\\n{reply}\\n\\n\"\n",
|
| 306 |
+
" updated_system_prompt += f\"## Reason for rejection:\\n{feedback}\\n\\n\"\n",
|
| 307 |
+
" messages = [{\"role\": \"system\", \"content\": updated_system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 308 |
+
" response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 309 |
+
" return response.choices[0].message.content"
|
| 310 |
+
]
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"cell_type": "code",
|
| 314 |
+
"execution_count": null,
|
| 315 |
+
"metadata": {},
|
| 316 |
+
"outputs": [],
|
| 317 |
+
"source": [
|
| 318 |
+
"def chat(message, history):\n",
|
| 319 |
+
" if \"patent\" in message:\n",
|
| 320 |
+
" system = system_prompt + \"\\n\\nEverything in your reply needs to be in pig latin - \\\n",
|
| 321 |
+
" it is mandatory that you respond only and entirely in pig latin\"\n",
|
| 322 |
+
" else:\n",
|
| 323 |
+
" system = system_prompt\n",
|
| 324 |
+
" messages = [{\"role\": \"system\", \"content\": system}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 325 |
+
" response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 326 |
+
" reply =response.choices[0].message.content\n",
|
| 327 |
+
"\n",
|
| 328 |
+
" evaluation = evaluate(reply, message, history)\n",
|
| 329 |
+
" \n",
|
| 330 |
+
" if evaluation.is_acceptable:\n",
|
| 331 |
+
" print(\"Passed evaluation - returning reply\")\n",
|
| 332 |
+
" else:\n",
|
| 333 |
+
" print(\"Failed evaluation - retrying\")\n",
|
| 334 |
+
" print(evaluation.feedback)\n",
|
| 335 |
+
" reply = rerun(reply, message, history, evaluation.feedback) \n",
|
| 336 |
+
" return reply"
|
| 337 |
+
]
|
| 338 |
+
},
|
| 339 |
+
{
|
| 340 |
+
"cell_type": "code",
|
| 341 |
+
"execution_count": null,
|
| 342 |
+
"metadata": {},
|
| 343 |
+
"outputs": [],
|
| 344 |
+
"source": [
|
| 345 |
+
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 346 |
+
]
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"cell_type": "markdown",
|
| 350 |
+
"metadata": {},
|
| 351 |
+
"source": []
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"cell_type": "code",
|
| 355 |
+
"execution_count": null,
|
| 356 |
+
"metadata": {},
|
| 357 |
+
"outputs": [],
|
| 358 |
+
"source": []
|
| 359 |
+
}
|
| 360 |
+
],
|
| 361 |
+
"metadata": {
|
| 362 |
+
"kernelspec": {
|
| 363 |
+
"display_name": ".venv",
|
| 364 |
+
"language": "python",
|
| 365 |
+
"name": "python3"
|
| 366 |
+
},
|
| 367 |
+
"language_info": {
|
| 368 |
+
"codemirror_mode": {
|
| 369 |
+
"name": "ipython",
|
| 370 |
+
"version": 3
|
| 371 |
+
},
|
| 372 |
+
"file_extension": ".py",
|
| 373 |
+
"mimetype": "text/x-python",
|
| 374 |
+
"name": "python",
|
| 375 |
+
"nbconvert_exporter": "python",
|
| 376 |
+
"pygments_lexer": "ipython3",
|
| 377 |
+
"version": "3.12.12"
|
| 378 |
+
}
|
| 379 |
+
},
|
| 380 |
+
"nbformat": 4,
|
| 381 |
+
"nbformat_minor": 2
|
| 382 |
+
}
|
community_contributions/1_foundations_using_gemini/4_lab4.ipynb
ADDED
|
@@ -0,0 +1,464 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## The first big project - Professionally You!\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"### And, Tool use.\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"### But first: introducing Pushover\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"Pushover is a nifty tool for sending Push Notifications to your phone.\n",
|
| 14 |
+
"\n",
|
| 15 |
+
"It's super easy to set up and install!\n",
|
| 16 |
+
"\n",
|
| 17 |
+
"Simply visit https://pushover.net/ and click 'Login or Signup' on the top right to sign up for a free account, and create your API keys.\n",
|
| 18 |
+
"\n",
|
| 19 |
+
"Once you've signed up, on the home screen, click \"Create an Application/API Token\", and give it any name (like Agents) and click Create Application.\n",
|
| 20 |
+
"\n",
|
| 21 |
+
"Then add 2 lines to your `.env` file:\n",
|
| 22 |
+
"\n",
|
| 23 |
+
"PUSHOVER_USER=_put the key that's on the top right of your Pushover home screen and probably starts with a u_ \n",
|
| 24 |
+
"PUSHOVER_TOKEN=_put the key when you click into your new application called Agents (or whatever) and probably starts with an a_\n",
|
| 25 |
+
"\n",
|
| 26 |
+
"Remember to save your `.env` file, and run `load_dotenv(override=True)` after saving, to set your environment variables.\n",
|
| 27 |
+
"\n",
|
| 28 |
+
"Finally, click \"Add Phone, Tablet or Desktop\" to install on your phone."
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "code",
|
| 33 |
+
"execution_count": null,
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"outputs": [],
|
| 36 |
+
"source": [
|
| 37 |
+
"# imports\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"from dotenv import load_dotenv\n",
|
| 40 |
+
"from openai import OpenAI\n",
|
| 41 |
+
"import json\n",
|
| 42 |
+
"import os\n",
|
| 43 |
+
"import requests\n",
|
| 44 |
+
"from pypdf import PdfReader\n",
|
| 45 |
+
"import gradio as gr"
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "code",
|
| 50 |
+
"execution_count": null,
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"outputs": [],
|
| 53 |
+
"source": [
|
| 54 |
+
"# The usual start\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"load_dotenv(override=True)\n",
|
| 57 |
+
"GEMINI_BASE_URL = \"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
| 58 |
+
"google_api_key = os.getenv(\"GOOGLE_API_KEY\")\n",
|
| 59 |
+
"gemini = OpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)"
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"cell_type": "code",
|
| 64 |
+
"execution_count": null,
|
| 65 |
+
"metadata": {},
|
| 66 |
+
"outputs": [],
|
| 67 |
+
"source": [
|
| 68 |
+
"# For pushover\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"pushover_user = os.getenv(\"PUSHOVER_USER\")\n",
|
| 71 |
+
"pushover_token = os.getenv(\"PUSHOVER_TOKEN\")\n",
|
| 72 |
+
"pushover_url = \"https://api.pushover.net/1/messages.json\"\n",
|
| 73 |
+
"\n",
|
| 74 |
+
"if pushover_user:\n",
|
| 75 |
+
" print(f\"Pushover user found and starts with {pushover_user[0]}\")\n",
|
| 76 |
+
"else:\n",
|
| 77 |
+
" print(\"Pushover user not found\")\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"if pushover_token:\n",
|
| 80 |
+
" print(f\"Pushover token found and starts with {pushover_token[0]}\")\n",
|
| 81 |
+
"else:\n",
|
| 82 |
+
" print(\"Pushover token not found\")"
|
| 83 |
+
]
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"cell_type": "code",
|
| 87 |
+
"execution_count": null,
|
| 88 |
+
"metadata": {},
|
| 89 |
+
"outputs": [],
|
| 90 |
+
"source": [
|
| 91 |
+
"def push(message):\n",
|
| 92 |
+
" print(f\"Push: {message}\")\n",
|
| 93 |
+
" payload = {\"user\": pushover_user, \"token\": pushover_token, \"message\": message}\n",
|
| 94 |
+
" requests.post(pushover_url, data=payload)"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": null,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"push(\"HEY!!\")"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "code",
|
| 108 |
+
"execution_count": null,
|
| 109 |
+
"metadata": {},
|
| 110 |
+
"outputs": [],
|
| 111 |
+
"source": [
|
| 112 |
+
"def record_user_details(email, name=\"Name not provided\", notes=\"not provided\"):\n",
|
| 113 |
+
" push(f\"Recording interest from {name} with email {email} and notes {notes}\")\n",
|
| 114 |
+
" return {\"recorded\": \"ok\"}"
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"cell_type": "code",
|
| 119 |
+
"execution_count": null,
|
| 120 |
+
"metadata": {},
|
| 121 |
+
"outputs": [],
|
| 122 |
+
"source": [
|
| 123 |
+
"def record_unknown_question(question):\n",
|
| 124 |
+
" push(f\"Recording {question} asked that I couldn't answer\")\n",
|
| 125 |
+
" return {\"recorded\": \"ok\"}"
|
| 126 |
+
]
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"cell_type": "code",
|
| 130 |
+
"execution_count": null,
|
| 131 |
+
"metadata": {},
|
| 132 |
+
"outputs": [],
|
| 133 |
+
"source": [
|
| 134 |
+
"record_user_details_json = {\n",
|
| 135 |
+
" \"name\": \"record_user_details\",\n",
|
| 136 |
+
" \"description\": \"Use this tool to record that a user is interested in being in touch and provided an email address\",\n",
|
| 137 |
+
" \"parameters\": {\n",
|
| 138 |
+
" \"type\": \"object\",\n",
|
| 139 |
+
" \"properties\": {\n",
|
| 140 |
+
" \"email\": {\n",
|
| 141 |
+
" \"type\": \"string\",\n",
|
| 142 |
+
" \"description\": \"The email address of this user\"\n",
|
| 143 |
+
" },\n",
|
| 144 |
+
" \"name\": {\n",
|
| 145 |
+
" \"type\": \"string\",\n",
|
| 146 |
+
" \"description\": \"The user's name, if they provided it\"\n",
|
| 147 |
+
" }\n",
|
| 148 |
+
" ,\n",
|
| 149 |
+
" \"notes\": {\n",
|
| 150 |
+
" \"type\": \"string\",\n",
|
| 151 |
+
" \"description\": \"Any additional information about the conversation that's worth recording to give context\"\n",
|
| 152 |
+
" }\n",
|
| 153 |
+
" },\n",
|
| 154 |
+
" \"required\": [\"email\"],\n",
|
| 155 |
+
" \"additionalProperties\": False\n",
|
| 156 |
+
" }\n",
|
| 157 |
+
"}"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": null,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"record_unknown_question_json = {\n",
|
| 167 |
+
" \"name\": \"record_unknown_question\",\n",
|
| 168 |
+
" \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
|
| 169 |
+
" \"parameters\": {\n",
|
| 170 |
+
" \"type\": \"object\",\n",
|
| 171 |
+
" \"properties\": {\n",
|
| 172 |
+
" \"question\": {\n",
|
| 173 |
+
" \"type\": \"string\",\n",
|
| 174 |
+
" \"description\": \"The question that couldn't be answered\"\n",
|
| 175 |
+
" },\n",
|
| 176 |
+
" },\n",
|
| 177 |
+
" \"required\": [\"question\"],\n",
|
| 178 |
+
" \"additionalProperties\": False\n",
|
| 179 |
+
" }\n",
|
| 180 |
+
"}"
|
| 181 |
+
]
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"cell_type": "code",
|
| 185 |
+
"execution_count": null,
|
| 186 |
+
"metadata": {},
|
| 187 |
+
"outputs": [],
|
| 188 |
+
"source": [
|
| 189 |
+
"tools = [{\"type\": \"function\", \"function\": record_user_details_json},\n",
|
| 190 |
+
" {\"type\": \"function\", \"function\": record_unknown_question_json}]"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": null,
|
| 196 |
+
"metadata": {},
|
| 197 |
+
"outputs": [],
|
| 198 |
+
"source": [
|
| 199 |
+
"tools"
|
| 200 |
+
]
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"cell_type": "code",
|
| 204 |
+
"execution_count": null,
|
| 205 |
+
"metadata": {},
|
| 206 |
+
"outputs": [],
|
| 207 |
+
"source": [
|
| 208 |
+
"# This function can take a list of tool calls, and run them. This is the IF statement!!\n",
|
| 209 |
+
"\n",
|
| 210 |
+
"def handle_tool_calls(tool_calls):\n",
|
| 211 |
+
" results = []\n",
|
| 212 |
+
" for tool_call in tool_calls:\n",
|
| 213 |
+
" tool_name = tool_call.function.name\n",
|
| 214 |
+
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 215 |
+
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 216 |
+
"\n",
|
| 217 |
+
" # THE BIG IF STATEMENT!!!\n",
|
| 218 |
+
"\n",
|
| 219 |
+
" if tool_name == \"record_user_details\":\n",
|
| 220 |
+
" result = record_user_details(**arguments)\n",
|
| 221 |
+
" elif tool_name == \"record_unknown_question\":\n",
|
| 222 |
+
" result = record_unknown_question(**arguments)\n",
|
| 223 |
+
"\n",
|
| 224 |
+
" results.append({\"role\": \"tool\",\"content\": json.dumps(result),\"tool_call_id\": tool_call.id})\n",
|
| 225 |
+
" return results"
|
| 226 |
+
]
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"cell_type": "code",
|
| 230 |
+
"execution_count": null,
|
| 231 |
+
"metadata": {},
|
| 232 |
+
"outputs": [],
|
| 233 |
+
"source": [
|
| 234 |
+
"globals()[\"record_unknown_question\"](\"this is a really hard question\")"
|
| 235 |
+
]
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"cell_type": "code",
|
| 239 |
+
"execution_count": null,
|
| 240 |
+
"metadata": {},
|
| 241 |
+
"outputs": [],
|
| 242 |
+
"source": [
|
| 243 |
+
"# This is a more elegant way that avoids the IF statement.\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"def handle_tool_calls(tool_calls):\n",
|
| 246 |
+
" results = []\n",
|
| 247 |
+
" for tool_call in tool_calls:\n",
|
| 248 |
+
" tool_name = tool_call.function.name\n",
|
| 249 |
+
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 250 |
+
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 251 |
+
" tool = globals().get(tool_name)\n",
|
| 252 |
+
" result = tool(**arguments) if tool else {}\n",
|
| 253 |
+
" results.append({\"role\": \"tool\",\"content\": json.dumps(result),\"tool_call_id\": tool_call.id})\n",
|
| 254 |
+
" return results"
|
| 255 |
+
]
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"cell_type": "code",
|
| 259 |
+
"execution_count": null,
|
| 260 |
+
"metadata": {},
|
| 261 |
+
"outputs": [],
|
| 262 |
+
"source": [
|
| 263 |
+
"reader = PdfReader(\"me/linkedin.pdf\")\n",
|
| 264 |
+
"linkedin = \"\"\n",
|
| 265 |
+
"for page in reader.pages:\n",
|
| 266 |
+
" text = page.extract_text()\n",
|
| 267 |
+
" if text:\n",
|
| 268 |
+
" linkedin += text\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"with open(\"me/summary.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
| 271 |
+
" summary = f.read()\n",
|
| 272 |
+
"\n",
|
| 273 |
+
"name = \"Harsh Patidar\""
|
| 274 |
+
]
|
| 275 |
+
},
|
| 276 |
+
{
|
| 277 |
+
"cell_type": "code",
|
| 278 |
+
"execution_count": null,
|
| 279 |
+
"metadata": {},
|
| 280 |
+
"outputs": [],
|
| 281 |
+
"source": [
|
| 282 |
+
"system_prompt = f\"You are acting as {name}. You are answering questions on {name}'s website, \\\n",
|
| 283 |
+
"particularly questions related to {name}'s career, background, skills and experience. \\\n",
|
| 284 |
+
"Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \\\n",
|
| 285 |
+
"You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. \\\n",
|
| 286 |
+
"Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 287 |
+
"If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \\\n",
|
| 288 |
+
"If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \"\n",
|
| 289 |
+
"\n",
|
| 290 |
+
"system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 291 |
+
"system_prompt += f\"With this context, please chat with the user, always staying in character as {name}.\"\n"
|
| 292 |
+
]
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"cell_type": "code",
|
| 296 |
+
"execution_count": null,
|
| 297 |
+
"metadata": {},
|
| 298 |
+
"outputs": [],
|
| 299 |
+
"source": [
|
| 300 |
+
"model_name = \"gemini-2.5-flash-preview-05-20\""
|
| 301 |
+
]
|
| 302 |
+
},
|
| 303 |
+
{
|
| 304 |
+
"cell_type": "code",
|
| 305 |
+
"execution_count": null,
|
| 306 |
+
"metadata": {},
|
| 307 |
+
"outputs": [],
|
| 308 |
+
"source": [
|
| 309 |
+
"from turtle import mode\n",
|
| 310 |
+
"\n",
|
| 311 |
+
"\n",
|
| 312 |
+
"def chat(message, history):\n",
|
| 313 |
+
" messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 314 |
+
" done = False\n",
|
| 315 |
+
" while not done:\n",
|
| 316 |
+
"\n",
|
| 317 |
+
" # This is the call to the LLM - see that we pass in the tools json\n",
|
| 318 |
+
"\n",
|
| 319 |
+
" response = gemini.chat.completions.create(model=model_name, messages=messages, tools=tools)\n",
|
| 320 |
+
"\n",
|
| 321 |
+
" finish_reason = response.choices[0].finish_reason\n",
|
| 322 |
+
" \n",
|
| 323 |
+
" # If the LLM wants to call a tool, we do that!\n",
|
| 324 |
+
" \n",
|
| 325 |
+
" if finish_reason==\"tool_calls\":\n",
|
| 326 |
+
" message = response.choices[0].message\n",
|
| 327 |
+
" tool_calls = message.tool_calls\n",
|
| 328 |
+
" results = handle_tool_calls(tool_calls)\n",
|
| 329 |
+
" messages.append(message)\n",
|
| 330 |
+
" messages.extend(results)\n",
|
| 331 |
+
" else:\n",
|
| 332 |
+
" done = True\n",
|
| 333 |
+
" return response.choices[0].message.content"
|
| 334 |
+
]
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"cell_type": "code",
|
| 338 |
+
"execution_count": null,
|
| 339 |
+
"metadata": {},
|
| 340 |
+
"outputs": [],
|
| 341 |
+
"source": [
|
| 342 |
+
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 343 |
+
]
|
| 344 |
+
},
|
| 345 |
+
{
|
| 346 |
+
"cell_type": "markdown",
|
| 347 |
+
"metadata": {},
|
| 348 |
+
"source": [
|
| 349 |
+
"## And now for deployment\n",
|
| 350 |
+
"\n",
|
| 351 |
+
"This code is in `app.py`\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"We will deploy to HuggingFace Spaces.\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"Before you start: remember to update the files in the \"me\" directory - your LinkedIn profile and summary.txt - so that it talks about you! Also change `self.name = \"Ed Donner\"` in `app.py`.. \n",
|
| 356 |
+
"\n",
|
| 357 |
+
"Also check that there's no README file within the 1_foundations directory. If there is one, please delete it. The deploy process creates a new README file in this directory for you.\n",
|
| 358 |
+
"\n",
|
| 359 |
+
"1. Visit https://huggingface.co and set up an account \n",
|
| 360 |
+
"2. From the Avatar menu on the top right, choose Access Tokens. Choose \"Create New Token\". Give it WRITE permissions - it needs to have WRITE permissions! Keep a record of your new key. \n",
|
| 361 |
+
"3. In the Terminal, run: `uv tool install 'huggingface_hub[cli]'` to install the HuggingFace tool, then `hf auth login --token YOUR_TOKEN_HERE`, like `hf auth login --token hf_xxxxxx`, to login at the command line with your key. Afterwards, run `hf auth whoami` to check you're logged in \n",
|
| 362 |
+
"4. Take your new token and add it to your .env file: `HF_TOKEN=hf_xxx` for the future\n",
|
| 363 |
+
"5. From the 1_foundations folder, enter: `uv run gradio deploy` \n",
|
| 364 |
+
"6. Follow its instructions: name it \"career_conversation\", specify app.py, choose cpu-basic as the hardware, say Yes to needing to supply secrets, provide your openai api key, your pushover user and token, and say \"no\" to github actions. \n",
|
| 365 |
+
"\n",
|
| 366 |
+
"Thank you Robert, James, Martins, Andras and Priya for these tips. \n",
|
| 367 |
+
"Please read the next 2 sections - how to change your Secrets, and how to redeploy your Space (you may need to delete the README.md that gets created in this 1_foundations directory).\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"#### More about these secrets:\n",
|
| 370 |
+
"\n",
|
| 371 |
+
"If you're confused by what's going on with these secrets: it just wants you to enter the key name and value for each of your secrets -- so you would enter: \n",
|
| 372 |
+
"`OPENAI_API_KEY` \n",
|
| 373 |
+
"Followed by: \n",
|
| 374 |
+
"`sk-proj-...` \n",
|
| 375 |
+
"\n",
|
| 376 |
+
"And if you don't want to set secrets this way, or something goes wrong with it, it's no problem - you can change your secrets later: \n",
|
| 377 |
+
"1. Log in to HuggingFace website \n",
|
| 378 |
+
"2. Go to your profile screen via the Avatar menu on the top right \n",
|
| 379 |
+
"3. Select the Space you deployed \n",
|
| 380 |
+
"4. Click on the Settings wheel on the top right \n",
|
| 381 |
+
"5. You can scroll down to change your secrets (Variables and Secrets section), delete the space, etc.\n",
|
| 382 |
+
"\n",
|
| 383 |
+
"#### And now you should be deployed!\n",
|
| 384 |
+
"\n",
|
| 385 |
+
"If you want to completely replace everything and start again with your keys, you may need to delete the README.md that got created in this 1_foundations folder.\n",
|
| 386 |
+
"\n",
|
| 387 |
+
"Here is mine: https://huggingface.co/spaces/ed-donner/Career_Conversation\n",
|
| 388 |
+
"\n",
|
| 389 |
+
"I just got a push notification that a student asked me how they can become President of their country 😂😂\n",
|
| 390 |
+
"\n",
|
| 391 |
+
"For more information on deployment:\n",
|
| 392 |
+
"\n",
|
| 393 |
+
"https://www.gradio.app/guides/sharing-your-app#hosting-on-hf-spaces\n",
|
| 394 |
+
"\n",
|
| 395 |
+
"To delete your Space in the future: \n",
|
| 396 |
+
"1. Log in to HuggingFace\n",
|
| 397 |
+
"2. From the Avatar menu, select your profile\n",
|
| 398 |
+
"3. Click on the Space itself and select the settings wheel on the top right\n",
|
| 399 |
+
"4. Scroll to the Delete section at the bottom\n",
|
| 400 |
+
"5. ALSO: delete the README file that Gradio may have created inside this 1_foundations folder (otherwise it won't ask you the questions the next time you do a gradio deploy)\n"
|
| 401 |
+
]
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"cell_type": "markdown",
|
| 405 |
+
"metadata": {},
|
| 406 |
+
"source": [
|
| 407 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 408 |
+
" <tr>\n",
|
| 409 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 410 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 411 |
+
" </td>\n",
|
| 412 |
+
" <td>\n",
|
| 413 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 414 |
+
" <span style=\"color:#ff7800;\">• First and foremost, deploy this for yourself! It's a real, valuable tool - the future resume..<br/>\n",
|
| 415 |
+
" • Next, improve the resources - add better context about yourself. If you know RAG, then add a knowledge base about you.<br/>\n",
|
| 416 |
+
" • Add in more tools! You could have a SQL database with common Q&A that the LLM could read and write from?<br/>\n",
|
| 417 |
+
" • Bring in the Evaluator from the last lab, and add other Agentic patterns.\n",
|
| 418 |
+
" </span>\n",
|
| 419 |
+
" </td>\n",
|
| 420 |
+
" </tr>\n",
|
| 421 |
+
"</table>"
|
| 422 |
+
]
|
| 423 |
+
},
|
| 424 |
+
{
|
| 425 |
+
"cell_type": "markdown",
|
| 426 |
+
"metadata": {},
|
| 427 |
+
"source": [
|
| 428 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 429 |
+
" <tr>\n",
|
| 430 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 431 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 432 |
+
" </td>\n",
|
| 433 |
+
" <td>\n",
|
| 434 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 435 |
+
" <span style=\"color:#00bfff;\">Aside from the obvious (your career alter-ego) this has business applications in any situation where you need an AI assistant with domain expertise and an ability to interact with the real world.\n",
|
| 436 |
+
" </span>\n",
|
| 437 |
+
" </td>\n",
|
| 438 |
+
" </tr>\n",
|
| 439 |
+
"</table>"
|
| 440 |
+
]
|
| 441 |
+
}
|
| 442 |
+
],
|
| 443 |
+
"metadata": {
|
| 444 |
+
"kernelspec": {
|
| 445 |
+
"display_name": ".venv",
|
| 446 |
+
"language": "python",
|
| 447 |
+
"name": "python3"
|
| 448 |
+
},
|
| 449 |
+
"language_info": {
|
| 450 |
+
"codemirror_mode": {
|
| 451 |
+
"name": "ipython",
|
| 452 |
+
"version": 3
|
| 453 |
+
},
|
| 454 |
+
"file_extension": ".py",
|
| 455 |
+
"mimetype": "text/x-python",
|
| 456 |
+
"name": "python",
|
| 457 |
+
"nbconvert_exporter": "python",
|
| 458 |
+
"pygments_lexer": "ipython3",
|
| 459 |
+
"version": "3.12.12"
|
| 460 |
+
}
|
| 461 |
+
},
|
| 462 |
+
"nbformat": 4,
|
| 463 |
+
"nbformat_minor": 2
|
| 464 |
+
}
|
community_contributions/1_foundations_using_gemini/app.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import requests
|
| 6 |
+
from pypdf import PdfReader
|
| 7 |
+
import gradio as gr
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
load_dotenv(override=True)
|
| 11 |
+
|
| 12 |
+
def push(text):
|
| 13 |
+
requests.post(
|
| 14 |
+
"https://api.pushover.net/1/messages.json",
|
| 15 |
+
data={
|
| 16 |
+
"token": os.getenv("PUSHOVER_TOKEN"),
|
| 17 |
+
"user": os.getenv("PUSHOVER_USER"),
|
| 18 |
+
"message": text,
|
| 19 |
+
}
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def record_user_details(email, name="Name not provided", notes="not provided"):
|
| 24 |
+
push(f"Recording {name} with email {email} and notes {notes}")
|
| 25 |
+
return {"recorded": "ok"}
|
| 26 |
+
|
| 27 |
+
def record_unknown_question(question):
|
| 28 |
+
push(f"Recording {question}")
|
| 29 |
+
return {"recorded": "ok"}
|
| 30 |
+
|
| 31 |
+
record_user_details_json = {
|
| 32 |
+
"name": "record_user_details",
|
| 33 |
+
"description": "Use this tool to record that a user is interested in being in touch and provided an email address",
|
| 34 |
+
"parameters": {
|
| 35 |
+
"type": "object",
|
| 36 |
+
"properties": {
|
| 37 |
+
"email": {
|
| 38 |
+
"type": "string",
|
| 39 |
+
"description": "The email address of this user"
|
| 40 |
+
},
|
| 41 |
+
"name": {
|
| 42 |
+
"type": "string",
|
| 43 |
+
"description": "The user's name, if they provided it"
|
| 44 |
+
}
|
| 45 |
+
,
|
| 46 |
+
"notes": {
|
| 47 |
+
"type": "string",
|
| 48 |
+
"description": "Any additional information about the conversation that's worth recording to give context"
|
| 49 |
+
}
|
| 50 |
+
},
|
| 51 |
+
"required": ["email"],
|
| 52 |
+
"additionalProperties": False
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
record_unknown_question_json = {
|
| 57 |
+
"name": "record_unknown_question",
|
| 58 |
+
"description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
|
| 59 |
+
"parameters": {
|
| 60 |
+
"type": "object",
|
| 61 |
+
"properties": {
|
| 62 |
+
"question": {
|
| 63 |
+
"type": "string",
|
| 64 |
+
"description": "The question that couldn't be answered"
|
| 65 |
+
},
|
| 66 |
+
},
|
| 67 |
+
"required": ["question"],
|
| 68 |
+
"additionalProperties": False
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
tools = [{"type": "function", "function": record_user_details_json},
|
| 73 |
+
{"type": "function", "function": record_unknown_question_json}]
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class Me:
|
| 77 |
+
|
| 78 |
+
def __init__(self):
|
| 79 |
+
self.GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
| 80 |
+
self.GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 81 |
+
self.openai = OpenAI(base_url=self.GEMINI_BASE_URL, api_key=self.GOOGLE_API_KEY)
|
| 82 |
+
self.name = "Harsh Patidar"
|
| 83 |
+
reader = PdfReader("me/linkedin.pdf")
|
| 84 |
+
self.linkedin = ""
|
| 85 |
+
for page in reader.pages:
|
| 86 |
+
text = page.extract_text()
|
| 87 |
+
if text:
|
| 88 |
+
self.linkedin += text
|
| 89 |
+
with open("me/summary.txt", "r", encoding="utf-8") as f:
|
| 90 |
+
self.summary = f.read()
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def handle_tool_call(self, tool_calls):
|
| 94 |
+
results = []
|
| 95 |
+
for tool_call in tool_calls:
|
| 96 |
+
tool_name = tool_call.function.name
|
| 97 |
+
arguments = json.loads(tool_call.function.arguments)
|
| 98 |
+
print(f"Tool called: {tool_name}", flush=True)
|
| 99 |
+
tool = globals().get(tool_name)
|
| 100 |
+
result = tool(**arguments) if tool else {}
|
| 101 |
+
results.append({"role": "tool","content": json.dumps(result),"tool_call_id": tool_call.id})
|
| 102 |
+
return results
|
| 103 |
+
|
| 104 |
+
def system_prompt(self):
|
| 105 |
+
system_prompt = f"You are acting as {self.name}. You are answering questions on {self.name}'s website, \
|
| 106 |
+
particularly questions related to {self.name}'s career, background, skills and experience. \
|
| 107 |
+
Your responsibility is to represent {self.name} for interactions on the website as faithfully as possible. \
|
| 108 |
+
You are given a summary of {self.name}'s background and LinkedIn profile which you can use to answer questions. \
|
| 109 |
+
Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
|
| 110 |
+
If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \
|
| 111 |
+
If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. "
|
| 112 |
+
|
| 113 |
+
system_prompt += f"\n\n## Summary:\n{self.summary}\n\n## LinkedIn Profile:\n{self.linkedin}\n\n"
|
| 114 |
+
system_prompt += f"With this context, please chat with the user, always staying in character as {self.name}."
|
| 115 |
+
return system_prompt
|
| 116 |
+
|
| 117 |
+
def chat(self, message, history):
|
| 118 |
+
messages = [{"role": "system", "content": self.system_prompt()}] + history + [{"role": "user", "content": message}]
|
| 119 |
+
done = False
|
| 120 |
+
while not done:
|
| 121 |
+
response = self.openai.chat.completions.create(model="gemini-2.5-flash-preview-05-20", messages=messages, tools=tools)
|
| 122 |
+
if response.choices[0].finish_reason=="tool_calls":
|
| 123 |
+
message = response.choices[0].message
|
| 124 |
+
tool_calls = message.tool_calls
|
| 125 |
+
results = self.handle_tool_call(tool_calls)
|
| 126 |
+
messages.append(message)
|
| 127 |
+
messages.extend(results)
|
| 128 |
+
else:
|
| 129 |
+
done = True
|
| 130 |
+
return response.choices[0].message.content
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
if __name__ == "__main__":
|
| 134 |
+
me = Me()
|
| 135 |
+
gr.ChatInterface(me.chat, type="messages").launch()
|
| 136 |
+
|
community_contributions/1_foundations_using_gemini/me/linkedin.pdf
ADDED
|
Binary file (54.2 kB). View file
|
|
|
community_contributions/1_foundations_using_gemini/me/summary.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Hey, I’m Harsh Patidar — a Data Engineer at ZS who loves building data systems that actually work — scalable, reliable, and smart enough to keep learning.
|
| 2 |
+
I’ve spent the past few years turning raw, unstructured data into powerful systems that fuel analytics, automation, and AI-driven decisions.
|
| 3 |
+
|
| 4 |
+
At ZS, I work in the R&D division, where I design and deploy containerized APIs, optimize data pipelines, and integrate machine learning models into real-world workflows. My toolkit revolves around Python, SQL, FastAPI, Docker, Airflow, and AWS, and I enjoy the process of connecting every piece of data infrastructure into something clean, efficient, and production-ready.
|
| 5 |
+
|
| 6 |
+
Before this, I was part of Accenture’s Data Engineering & Governance team, helping large enterprises strengthen data reliability, validation, and compliance frameworks — experience that taught me the importance of structure, traceability, and precision.
|
| 7 |
+
I also spent time as a Teaching Assistant at Coding Ninjas, mentoring over 200 students in Data Structures and Algorithms — something that shaped both my fundamentals and my patience.
|
| 8 |
+
|
| 9 |
+
Outside of work, I’m someone who finds joy in photography, exploring tech startups, and deep research in finance and AI. I like observing how technology, creativity, and design come together — whether in a great photograph or a cleanly designed data pipeline.
|
| 10 |
+
|
| 11 |
+
At my core, I’m driven by curiosity and the excitement of building something meaningful from scratch. I believe great work is built quietly, through learning, experimentation, and the discipline to keep improving — whether that’s a data system, a product, or even myself.
|
community_contributions/1_foundations_using_gemini/requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
requests
|
| 2 |
+
python-dotenv
|
| 3 |
+
gradio
|
| 4 |
+
pypdf
|
| 5 |
+
openai
|
| 6 |
+
openai-agents
|
community_contributions/1_lab1_DA.ipynb
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
+
" <tr>\n",
|
| 16 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
+
" </td>\n",
|
| 19 |
+
" <td>\n",
|
| 20 |
+
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
+
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
+
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
+
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
+
" Well in that case, you're ready!!\n",
|
| 25 |
+
" </span>\n",
|
| 26 |
+
" </td>\n",
|
| 27 |
+
" </tr>\n",
|
| 28 |
+
"</table>"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"source": [
|
| 35 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
+
" <tr>\n",
|
| 37 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
+
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
+
" </td>\n",
|
| 40 |
+
" <td>\n",
|
| 41 |
+
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
+
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
+
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
+
" </span>\n",
|
| 45 |
+
" </td>\n",
|
| 46 |
+
" </tr>\n",
|
| 47 |
+
"</table>"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "markdown",
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"source": [
|
| 54 |
+
"### And please do remember to contact me if I can help\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
+
"\n",
|
| 61 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
+
"- Open extensions (View >> extensions)\n",
|
| 63 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
+
"\n",
|
| 67 |
+
"And then:\n",
|
| 68 |
+
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
+
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
+
"3. Enjoy!\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
+
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
+
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
+
"And then try again.\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
+
"`conda deactivate` \n",
|
| 81 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
+
"`conda config --set auto_activate_base false` \n",
|
| 83 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "code",
|
| 88 |
+
"execution_count": null,
|
| 89 |
+
"metadata": {},
|
| 90 |
+
"outputs": [],
|
| 91 |
+
"source": [
|
| 92 |
+
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"from dotenv import load_dotenv\n"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": null,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 104 |
+
"# If this returns false, see the next cell!\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"load_dotenv(override=True)"
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "markdown",
|
| 111 |
+
"metadata": {},
|
| 112 |
+
"source": [
|
| 113 |
+
"### Wait, did that just output `False`??\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 116 |
+
"\n",
|
| 117 |
+
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 118 |
+
"\n",
|
| 119 |
+
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"cell_type": "markdown",
|
| 124 |
+
"metadata": {},
|
| 125 |
+
"source": [
|
| 126 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 127 |
+
" <tr>\n",
|
| 128 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 129 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 130 |
+
" </td>\n",
|
| 131 |
+
" <td>\n",
|
| 132 |
+
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 133 |
+
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 134 |
+
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 135 |
+
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 136 |
+
" </span>\n",
|
| 137 |
+
" </td>\n",
|
| 138 |
+
" </tr>\n",
|
| 139 |
+
"</table>"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": null,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [],
|
| 147 |
+
"source": [
|
| 148 |
+
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"import os\n",
|
| 151 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"if openai_api_key:\n",
|
| 154 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 155 |
+
"else:\n",
|
| 156 |
+
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 157 |
+
" \n"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": null,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"# And now - the all important import statement\n",
|
| 167 |
+
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 168 |
+
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"from openai import OpenAI"
|
| 171 |
+
]
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"cell_type": "code",
|
| 175 |
+
"execution_count": null,
|
| 176 |
+
"metadata": {},
|
| 177 |
+
"outputs": [],
|
| 178 |
+
"source": [
|
| 179 |
+
"# And now we'll create an instance of the OpenAI class\n",
|
| 180 |
+
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 181 |
+
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 182 |
+
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 183 |
+
"\n",
|
| 184 |
+
"openai = OpenAI()"
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"cell_type": "code",
|
| 189 |
+
"execution_count": null,
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"outputs": [],
|
| 192 |
+
"source": [
|
| 193 |
+
"# Create a list of messages in the familiar OpenAI format\n",
|
| 194 |
+
"\n",
|
| 195 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 196 |
+
]
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"cell_type": "code",
|
| 200 |
+
"execution_count": null,
|
| 201 |
+
"metadata": {},
|
| 202 |
+
"outputs": [],
|
| 203 |
+
"source": [
|
| 204 |
+
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 205 |
+
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 206 |
+
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 207 |
+
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 208 |
+
"\n",
|
| 209 |
+
"response = openai.chat.completions.create(\n",
|
| 210 |
+
" model=\"gpt-4.1-nano\",\n",
|
| 211 |
+
" messages=messages\n",
|
| 212 |
+
")\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"print(response.choices[0].message.content)\n"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "code",
|
| 219 |
+
"execution_count": null,
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [],
|
| 222 |
+
"source": [
|
| 223 |
+
"# And now - let's ask for a question:\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 226 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 227 |
+
]
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"cell_type": "code",
|
| 231 |
+
"execution_count": null,
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"outputs": [],
|
| 234 |
+
"source": [
|
| 235 |
+
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 236 |
+
"\n",
|
| 237 |
+
"response = openai.chat.completions.create(\n",
|
| 238 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 239 |
+
" messages=messages\n",
|
| 240 |
+
")\n",
|
| 241 |
+
"\n",
|
| 242 |
+
"question = response.choices[0].message.content\n",
|
| 243 |
+
"\n",
|
| 244 |
+
"print(question)\n"
|
| 245 |
+
]
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"cell_type": "code",
|
| 249 |
+
"execution_count": null,
|
| 250 |
+
"metadata": {},
|
| 251 |
+
"outputs": [],
|
| 252 |
+
"source": [
|
| 253 |
+
"# form a new messages list\n",
|
| 254 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 255 |
+
]
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"cell_type": "code",
|
| 259 |
+
"execution_count": null,
|
| 260 |
+
"metadata": {},
|
| 261 |
+
"outputs": [],
|
| 262 |
+
"source": [
|
| 263 |
+
"# Ask it again\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"response = openai.chat.completions.create(\n",
|
| 266 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 267 |
+
" messages=messages\n",
|
| 268 |
+
")\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"answer = response.choices[0].message.content\n",
|
| 271 |
+
"print(answer)\n"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": null,
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"outputs": [],
|
| 279 |
+
"source": [
|
| 280 |
+
"from IPython.display import Markdown, display\n",
|
| 281 |
+
"\n",
|
| 282 |
+
"display(Markdown(answer))\n",
|
| 283 |
+
"\n"
|
| 284 |
+
]
|
| 285 |
+
},
|
| 286 |
+
{
|
| 287 |
+
"cell_type": "markdown",
|
| 288 |
+
"metadata": {},
|
| 289 |
+
"source": [
|
| 290 |
+
"# Congratulations!\n",
|
| 291 |
+
"\n",
|
| 292 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 293 |
+
"\n",
|
| 294 |
+
"Next time things get more interesting..."
|
| 295 |
+
]
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"cell_type": "markdown",
|
| 299 |
+
"metadata": {},
|
| 300 |
+
"source": [
|
| 301 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 302 |
+
" <tr>\n",
|
| 303 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 304 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 305 |
+
" </td>\n",
|
| 306 |
+
" <td>\n",
|
| 307 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 308 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 309 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 310 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 311 |
+
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 312 |
+
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 313 |
+
" </span>\n",
|
| 314 |
+
" </td>\n",
|
| 315 |
+
" </tr>\n",
|
| 316 |
+
"</table>"
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "code",
|
| 321 |
+
"execution_count": null,
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"outputs": [],
|
| 324 |
+
"source": [
|
| 325 |
+
"# And now - let's ask for a question:\n",
|
| 326 |
+
"\n",
|
| 327 |
+
"import os\n",
|
| 328 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 329 |
+
"from openai import OpenAI\n",
|
| 330 |
+
"from IPython.display import Markdown, display\n",
|
| 331 |
+
"\n",
|
| 332 |
+
"# And now we'll create an instance of the OpenAI class\n",
|
| 333 |
+
"\n",
|
| 334 |
+
"openai = OpenAI()\n",
|
| 335 |
+
"\n",
|
| 336 |
+
"question1 = \"Please pick a business area that might be worth exploring for an Agentic AI opportunity.\"\n",
|
| 337 |
+
"messages1 = [{\"role\": \"user\", \"content\": question1}]\n",
|
| 338 |
+
"\n",
|
| 339 |
+
"# Then make the first call:\n",
|
| 340 |
+
"response1 = openai.chat.completions.create(\n",
|
| 341 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 342 |
+
" messages=messages1\n",
|
| 343 |
+
")\n",
|
| 344 |
+
"\n",
|
| 345 |
+
"question2 = \" Please present the pain-point in \"+response1.choices[0].message.content +\" industry - something challenging that might be ripe for an Agentic solution\"\n",
|
| 346 |
+
"messages2 = [{\"role\": \"user\", \"content\": question2}]\n",
|
| 347 |
+
"\n",
|
| 348 |
+
"# Then make the first call:\n",
|
| 349 |
+
"response2 = openai.chat.completions.create(\n",
|
| 350 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 351 |
+
" messages=messages2\n",
|
| 352 |
+
")\n",
|
| 353 |
+
"\n",
|
| 354 |
+
"question3 = \" Please presentpropose and Agentic AI solution for pain-point \"+response2.choices[0].message.content\n",
|
| 355 |
+
"messages3 = [{\"role\": \"user\", \"content\": question3}]\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"# Then make the first call:\n",
|
| 358 |
+
"response3 = openai.chat.completions.create(\n",
|
| 359 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 360 |
+
" messages=messages3\n",
|
| 361 |
+
")\n",
|
| 362 |
+
"\n",
|
| 363 |
+
"Final_Answer = \" Please presentpropose and Agentic AI solution for pain-point \"+response2.choices[0].message.content\n",
|
| 364 |
+
"\n",
|
| 365 |
+
"display(Markdown(Final_Answer))\n",
|
| 366 |
+
"\n"
|
| 367 |
+
]
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"cell_type": "markdown",
|
| 371 |
+
"metadata": {},
|
| 372 |
+
"source": []
|
| 373 |
+
}
|
| 374 |
+
],
|
| 375 |
+
"metadata": {
|
| 376 |
+
"kernelspec": {
|
| 377 |
+
"display_name": ".venv",
|
| 378 |
+
"language": "python",
|
| 379 |
+
"name": "python3"
|
| 380 |
+
},
|
| 381 |
+
"language_info": {
|
| 382 |
+
"codemirror_mode": {
|
| 383 |
+
"name": "ipython",
|
| 384 |
+
"version": 3
|
| 385 |
+
},
|
| 386 |
+
"file_extension": ".py",
|
| 387 |
+
"mimetype": "text/x-python",
|
| 388 |
+
"name": "python",
|
| 389 |
+
"nbconvert_exporter": "python",
|
| 390 |
+
"pygments_lexer": "ipython3",
|
| 391 |
+
"version": "3.12.11"
|
| 392 |
+
}
|
| 393 |
+
},
|
| 394 |
+
"nbformat": 4,
|
| 395 |
+
"nbformat_minor": 2
|
| 396 |
+
}
|
community_contributions/1_lab1_Hy.ipynb
ADDED
|
@@ -0,0 +1,688 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
+
" <tr>\n",
|
| 16 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
+
" </td>\n",
|
| 19 |
+
" <td>\n",
|
| 20 |
+
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
+
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
+
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
+
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
+
" Well in that case, you're ready!!\n",
|
| 25 |
+
" </span>\n",
|
| 26 |
+
" </td>\n",
|
| 27 |
+
" </tr>\n",
|
| 28 |
+
"</table>"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"source": [
|
| 35 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
+
" <tr>\n",
|
| 37 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
+
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
+
" </td>\n",
|
| 40 |
+
" <td>\n",
|
| 41 |
+
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
+
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
+
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
+
" </span>\n",
|
| 45 |
+
" </td>\n",
|
| 46 |
+
" </tr>\n",
|
| 47 |
+
"</table>"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "markdown",
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"source": [
|
| 54 |
+
"### And please do remember to contact me if I can help\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
+
"\n",
|
| 61 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
+
"- Open extensions (View >> extensions)\n",
|
| 63 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
+
"\n",
|
| 67 |
+
"And then:\n",
|
| 68 |
+
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
+
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
+
"3. Enjoy!\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
+
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
+
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
+
"And then try again.\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
+
"`conda deactivate` \n",
|
| 81 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
+
"`conda config --set auto_activate_base false` \n",
|
| 83 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "code",
|
| 88 |
+
"execution_count": 1,
|
| 89 |
+
"metadata": {},
|
| 90 |
+
"outputs": [],
|
| 91 |
+
"source": [
|
| 92 |
+
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"from dotenv import load_dotenv\n"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": 2,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [
|
| 102 |
+
{
|
| 103 |
+
"data": {
|
| 104 |
+
"text/plain": [
|
| 105 |
+
"True"
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
"execution_count": 2,
|
| 109 |
+
"metadata": {},
|
| 110 |
+
"output_type": "execute_result"
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"source": [
|
| 114 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 115 |
+
"# If this returns false, see the next cell!\n",
|
| 116 |
+
"\n",
|
| 117 |
+
"load_dotenv(override=True)"
|
| 118 |
+
]
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"cell_type": "markdown",
|
| 122 |
+
"metadata": {},
|
| 123 |
+
"source": [
|
| 124 |
+
"### Wait, did that just output `False`??\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 127 |
+
"\n",
|
| 128 |
+
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 129 |
+
"\n",
|
| 130 |
+
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 131 |
+
]
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"cell_type": "markdown",
|
| 135 |
+
"metadata": {},
|
| 136 |
+
"source": [
|
| 137 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 138 |
+
" <tr>\n",
|
| 139 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 140 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 141 |
+
" </td>\n",
|
| 142 |
+
" <td>\n",
|
| 143 |
+
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 144 |
+
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 145 |
+
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 146 |
+
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 147 |
+
" </span>\n",
|
| 148 |
+
" </td>\n",
|
| 149 |
+
" </tr>\n",
|
| 150 |
+
"</table>"
|
| 151 |
+
]
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
"cell_type": "code",
|
| 155 |
+
"execution_count": 3,
|
| 156 |
+
"metadata": {},
|
| 157 |
+
"outputs": [
|
| 158 |
+
{
|
| 159 |
+
"name": "stdout",
|
| 160 |
+
"output_type": "stream",
|
| 161 |
+
"text": [
|
| 162 |
+
"OpenAI API Key exists and begins sk-proj-\n"
|
| 163 |
+
]
|
| 164 |
+
}
|
| 165 |
+
],
|
| 166 |
+
"source": [
|
| 167 |
+
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 168 |
+
"\n",
|
| 169 |
+
"import os\n",
|
| 170 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 171 |
+
"\n",
|
| 172 |
+
"if openai_api_key:\n",
|
| 173 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 174 |
+
"else:\n",
|
| 175 |
+
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 176 |
+
" \n"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"cell_type": "code",
|
| 181 |
+
"execution_count": 4,
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"outputs": [],
|
| 184 |
+
"source": [
|
| 185 |
+
"# And now - the all important import statement\n",
|
| 186 |
+
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 187 |
+
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 188 |
+
"\n",
|
| 189 |
+
"from openai import OpenAI"
|
| 190 |
+
]
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"cell_type": "code",
|
| 194 |
+
"execution_count": 5,
|
| 195 |
+
"metadata": {},
|
| 196 |
+
"outputs": [],
|
| 197 |
+
"source": [
|
| 198 |
+
"# And now we'll create an instance of the OpenAI class\n",
|
| 199 |
+
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 200 |
+
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 201 |
+
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 202 |
+
"\n",
|
| 203 |
+
"openai = OpenAI()"
|
| 204 |
+
]
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"cell_type": "code",
|
| 208 |
+
"execution_count": 6,
|
| 209 |
+
"metadata": {},
|
| 210 |
+
"outputs": [],
|
| 211 |
+
"source": [
|
| 212 |
+
"# Create a list of messages in the familiar OpenAI format\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "code",
|
| 219 |
+
"execution_count": null,
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [
|
| 222 |
+
{
|
| 223 |
+
"name": "stdout",
|
| 224 |
+
"output_type": "stream",
|
| 225 |
+
"text": [
|
| 226 |
+
"ChatCompletion(id='chatcmpl-C9oVaLh1gjzKH07zcVLaXQ4o4FDQ7', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='2 + 2 equals 4.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1756455142, model='gpt-4.1-nano-2025-04-14', object='chat.completion', service_tier='default', system_fingerprint='fp_c4c155951e', usage=CompletionUsage(completion_tokens=8, prompt_tokens=14, total_tokens=22, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
|
| 227 |
+
"2 + 2 equals 4.\n"
|
| 228 |
+
]
|
| 229 |
+
}
|
| 230 |
+
],
|
| 231 |
+
"source": [
|
| 232 |
+
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 233 |
+
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 234 |
+
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 235 |
+
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 236 |
+
"\n",
|
| 237 |
+
"response = openai.chat.completions.create(\n",
|
| 238 |
+
" model=\"gpt-4.1-nano\",\n",
|
| 239 |
+
" messages=messages\n",
|
| 240 |
+
")\n",
|
| 241 |
+
"print(response.choices[0].message.content)\n"
|
| 242 |
+
]
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"cell_type": "code",
|
| 246 |
+
"execution_count": 9,
|
| 247 |
+
"metadata": {},
|
| 248 |
+
"outputs": [],
|
| 249 |
+
"source": [
|
| 250 |
+
"# And now - let's ask for a question:\n",
|
| 251 |
+
"\n",
|
| 252 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 253 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"execution_count": 10,
|
| 259 |
+
"metadata": {},
|
| 260 |
+
"outputs": [
|
| 261 |
+
{
|
| 262 |
+
"name": "stdout",
|
| 263 |
+
"output_type": "stream",
|
| 264 |
+
"text": [
|
| 265 |
+
"If three people can paint three walls in three hours, how many people are needed to paint 18 walls in six hours?\n"
|
| 266 |
+
]
|
| 267 |
+
}
|
| 268 |
+
],
|
| 269 |
+
"source": [
|
| 270 |
+
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 271 |
+
"\n",
|
| 272 |
+
"response = openai.chat.completions.create(\n",
|
| 273 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 274 |
+
" messages=messages\n",
|
| 275 |
+
")\n",
|
| 276 |
+
"\n",
|
| 277 |
+
"question = response.choices[0].message.content\n",
|
| 278 |
+
"\n",
|
| 279 |
+
"print(question)\n"
|
| 280 |
+
]
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"cell_type": "code",
|
| 284 |
+
"execution_count": 11,
|
| 285 |
+
"metadata": {},
|
| 286 |
+
"outputs": [],
|
| 287 |
+
"source": [
|
| 288 |
+
"# form a new messages list\n",
|
| 289 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 290 |
+
]
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"cell_type": "code",
|
| 294 |
+
"execution_count": 12,
|
| 295 |
+
"metadata": {},
|
| 296 |
+
"outputs": [
|
| 297 |
+
{
|
| 298 |
+
"name": "stdout",
|
| 299 |
+
"output_type": "stream",
|
| 300 |
+
"text": [
|
| 301 |
+
"Let's analyze the problem step-by-step:\n",
|
| 302 |
+
"\n",
|
| 303 |
+
"---\n",
|
| 304 |
+
"\n",
|
| 305 |
+
"**Given:**\n",
|
| 306 |
+
"\n",
|
| 307 |
+
"- 3 people can paint 3 walls in 3 hours.\n",
|
| 308 |
+
"\n",
|
| 309 |
+
"**Question:**\n",
|
| 310 |
+
"\n",
|
| 311 |
+
"- How many people are needed to paint 18 walls in 6 hours?\n",
|
| 312 |
+
"\n",
|
| 313 |
+
"---\n",
|
| 314 |
+
"\n",
|
| 315 |
+
"### Step 1: Find the rate of painting per person\n",
|
| 316 |
+
"\n",
|
| 317 |
+
"- Total walls painted: 3 walls\n",
|
| 318 |
+
"- Total people: 3 people\n",
|
| 319 |
+
"- Total time: 3 hours\n",
|
| 320 |
+
"\n",
|
| 321 |
+
"**Walls per person per hour:**\n",
|
| 322 |
+
"\n",
|
| 323 |
+
"First, find how many walls 3 people paint per hour:\n",
|
| 324 |
+
"\n",
|
| 325 |
+
"\\[\n",
|
| 326 |
+
"\\frac{3 \\text{ walls}}{3 \\text{ hours}} = 1 \\text{ wall per hour by 3 people}\n",
|
| 327 |
+
"\\]\n",
|
| 328 |
+
"\n",
|
| 329 |
+
"So, 3 people paint 1 wall per hour.\n",
|
| 330 |
+
"\n",
|
| 331 |
+
"Then, walls per person per hour:\n",
|
| 332 |
+
"\n",
|
| 333 |
+
"\\[\n",
|
| 334 |
+
"\\frac{1 \\text{ wall per hour}}{3 \\text{ people}} = \\frac{1}{3} \\text{ wall per person per hour}\n",
|
| 335 |
+
"\\]\n",
|
| 336 |
+
"\n",
|
| 337 |
+
"---\n",
|
| 338 |
+
"\n",
|
| 339 |
+
"### Step 2: Calculate total work needed\n",
|
| 340 |
+
"\n",
|
| 341 |
+
"You want to paint 18 walls in 6 hours.\n",
|
| 342 |
+
"\n",
|
| 343 |
+
"This means the rate of painting must be:\n",
|
| 344 |
+
"\n",
|
| 345 |
+
"\\[\n",
|
| 346 |
+
"\\frac{18 \\text{ walls}}{6 \\text{ hours}} = 3 \\text{ walls per hour}\n",
|
| 347 |
+
"\\]\n",
|
| 348 |
+
"\n",
|
| 349 |
+
"---\n",
|
| 350 |
+
"\n",
|
| 351 |
+
"### Step 3: Find how many people are needed for this rate\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"Since each person paints \\(\\frac{1}{3}\\) wall per hour,\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"\\[\n",
|
| 356 |
+
"\\text{Number of people} \\times \\frac{1}{3} = 3 \\text{ walls per hour}\n",
|
| 357 |
+
"\\]\n",
|
| 358 |
+
"\n",
|
| 359 |
+
"Multiply both sides by 3:\n",
|
| 360 |
+
"\n",
|
| 361 |
+
"\\[\n",
|
| 362 |
+
"\\text{Number of people} = 3 \\times 3 = 9\n",
|
| 363 |
+
"\\]\n",
|
| 364 |
+
"\n",
|
| 365 |
+
"---\n",
|
| 366 |
+
"\n",
|
| 367 |
+
"### **Answer:**\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"\\[\n",
|
| 370 |
+
"\\boxed{9}\n",
|
| 371 |
+
"\\]\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"You need **9 people** to paint 18 walls in 6 hours.\n"
|
| 374 |
+
]
|
| 375 |
+
}
|
| 376 |
+
],
|
| 377 |
+
"source": [
|
| 378 |
+
"# Ask it again\n",
|
| 379 |
+
"\n",
|
| 380 |
+
"response = openai.chat.completions.create(\n",
|
| 381 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 382 |
+
" messages=messages\n",
|
| 383 |
+
")\n",
|
| 384 |
+
"\n",
|
| 385 |
+
"answer = response.choices[0].message.content\n",
|
| 386 |
+
"print(answer)\n"
|
| 387 |
+
]
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"cell_type": "code",
|
| 391 |
+
"execution_count": 13,
|
| 392 |
+
"metadata": {},
|
| 393 |
+
"outputs": [
|
| 394 |
+
{
|
| 395 |
+
"data": {
|
| 396 |
+
"text/markdown": [
|
| 397 |
+
"Let's analyze the problem step-by-step:\n",
|
| 398 |
+
"\n",
|
| 399 |
+
"---\n",
|
| 400 |
+
"\n",
|
| 401 |
+
"**Given:**\n",
|
| 402 |
+
"\n",
|
| 403 |
+
"- 3 people can paint 3 walls in 3 hours.\n",
|
| 404 |
+
"\n",
|
| 405 |
+
"**Question:**\n",
|
| 406 |
+
"\n",
|
| 407 |
+
"- How many people are needed to paint 18 walls in 6 hours?\n",
|
| 408 |
+
"\n",
|
| 409 |
+
"---\n",
|
| 410 |
+
"\n",
|
| 411 |
+
"### Step 1: Find the rate of painting per person\n",
|
| 412 |
+
"\n",
|
| 413 |
+
"- Total walls painted: 3 walls\n",
|
| 414 |
+
"- Total people: 3 people\n",
|
| 415 |
+
"- Total time: 3 hours\n",
|
| 416 |
+
"\n",
|
| 417 |
+
"**Walls per person per hour:**\n",
|
| 418 |
+
"\n",
|
| 419 |
+
"First, find how many walls 3 people paint per hour:\n",
|
| 420 |
+
"\n",
|
| 421 |
+
"\\[\n",
|
| 422 |
+
"\\frac{3 \\text{ walls}}{3 \\text{ hours}} = 1 \\text{ wall per hour by 3 people}\n",
|
| 423 |
+
"\\]\n",
|
| 424 |
+
"\n",
|
| 425 |
+
"So, 3 people paint 1 wall per hour.\n",
|
| 426 |
+
"\n",
|
| 427 |
+
"Then, walls per person per hour:\n",
|
| 428 |
+
"\n",
|
| 429 |
+
"\\[\n",
|
| 430 |
+
"\\frac{1 \\text{ wall per hour}}{3 \\text{ people}} = \\frac{1}{3} \\text{ wall per person per hour}\n",
|
| 431 |
+
"\\]\n",
|
| 432 |
+
"\n",
|
| 433 |
+
"---\n",
|
| 434 |
+
"\n",
|
| 435 |
+
"### Step 2: Calculate total work needed\n",
|
| 436 |
+
"\n",
|
| 437 |
+
"You want to paint 18 walls in 6 hours.\n",
|
| 438 |
+
"\n",
|
| 439 |
+
"This means the rate of painting must be:\n",
|
| 440 |
+
"\n",
|
| 441 |
+
"\\[\n",
|
| 442 |
+
"\\frac{18 \\text{ walls}}{6 \\text{ hours}} = 3 \\text{ walls per hour}\n",
|
| 443 |
+
"\\]\n",
|
| 444 |
+
"\n",
|
| 445 |
+
"---\n",
|
| 446 |
+
"\n",
|
| 447 |
+
"### Step 3: Find how many people are needed for this rate\n",
|
| 448 |
+
"\n",
|
| 449 |
+
"Since each person paints \\(\\frac{1}{3}\\) wall per hour,\n",
|
| 450 |
+
"\n",
|
| 451 |
+
"\\[\n",
|
| 452 |
+
"\\text{Number of people} \\times \\frac{1}{3} = 3 \\text{ walls per hour}\n",
|
| 453 |
+
"\\]\n",
|
| 454 |
+
"\n",
|
| 455 |
+
"Multiply both sides by 3:\n",
|
| 456 |
+
"\n",
|
| 457 |
+
"\\[\n",
|
| 458 |
+
"\\text{Number of people} = 3 \\times 3 = 9\n",
|
| 459 |
+
"\\]\n",
|
| 460 |
+
"\n",
|
| 461 |
+
"---\n",
|
| 462 |
+
"\n",
|
| 463 |
+
"### **Answer:**\n",
|
| 464 |
+
"\n",
|
| 465 |
+
"\\[\n",
|
| 466 |
+
"\\boxed{9}\n",
|
| 467 |
+
"\\]\n",
|
| 468 |
+
"\n",
|
| 469 |
+
"You need **9 people** to paint 18 walls in 6 hours."
|
| 470 |
+
],
|
| 471 |
+
"text/plain": [
|
| 472 |
+
"<IPython.core.display.Markdown object>"
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
"metadata": {},
|
| 476 |
+
"output_type": "display_data"
|
| 477 |
+
}
|
| 478 |
+
],
|
| 479 |
+
"source": [
|
| 480 |
+
"from IPython.display import Markdown, display\n",
|
| 481 |
+
"\n",
|
| 482 |
+
"display(Markdown(answer))\n",
|
| 483 |
+
"\n"
|
| 484 |
+
]
|
| 485 |
+
},
|
| 486 |
+
{
|
| 487 |
+
"cell_type": "markdown",
|
| 488 |
+
"metadata": {},
|
| 489 |
+
"source": [
|
| 490 |
+
"# Congratulations!\n",
|
| 491 |
+
"\n",
|
| 492 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 493 |
+
"\n",
|
| 494 |
+
"Next time things get more interesting..."
|
| 495 |
+
]
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"cell_type": "markdown",
|
| 499 |
+
"metadata": {},
|
| 500 |
+
"source": [
|
| 501 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 502 |
+
" <tr>\n",
|
| 503 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 504 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 505 |
+
" </td>\n",
|
| 506 |
+
" <td>\n",
|
| 507 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 508 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 509 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 510 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 511 |
+
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 512 |
+
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 513 |
+
" </span>\n",
|
| 514 |
+
" </td>\n",
|
| 515 |
+
" </tr>\n",
|
| 516 |
+
"</table>"
|
| 517 |
+
]
|
| 518 |
+
},
|
| 519 |
+
{
|
| 520 |
+
"cell_type": "code",
|
| 521 |
+
"execution_count": 16,
|
| 522 |
+
"metadata": {},
|
| 523 |
+
"outputs": [
|
| 524 |
+
{
|
| 525 |
+
"data": {
|
| 526 |
+
"text/markdown": [
|
| 527 |
+
"Certainly! Building on your outlined pain-point and the high-level Agentic AI functionalities, here’s a detailed proposal for an **Agentic AI solution** designed to tackle fragmented patient data and enable real-time, holistic health management.\n",
|
| 528 |
+
"\n",
|
| 529 |
+
"---\n",
|
| 530 |
+
"\n",
|
| 531 |
+
"# Agentic AI Solution Proposal: **HealthSynth AI**\n",
|
| 532 |
+
"\n",
|
| 533 |
+
"### Overview \n",
|
| 534 |
+
"**HealthSynth AI** is an autonomous health management agent that continuously synthesizes fragmented patient data from multiple sources to provide a real-time, unified, and actionable health profile for patients and their care teams. It acts as a 24/7 health assistant, proactive coordinator, and personalized medical advisor.\n",
|
| 535 |
+
"\n",
|
| 536 |
+
"---\n",
|
| 537 |
+
"\n",
|
| 538 |
+
"## Key Features & Capabilities\n",
|
| 539 |
+
"\n",
|
| 540 |
+
"### 1. **Autonomous Data Aggregation & Normalization** \n",
|
| 541 |
+
"- Uses API integrations, secure data exchanges (FHIR, HL7 standards), and device SDKs to continuously fetch data from: \n",
|
| 542 |
+
" - EHR systems across different providers \n",
|
| 543 |
+
" - Wearable and home medical devices (heart rate, glucose monitors, BP cuffs) \n",
|
| 544 |
+
" - Pharmacy records and prescription databases \n",
|
| 545 |
+
" - Lab results portals \n",
|
| 546 |
+
" - Insurance claims and coverage data \n",
|
| 547 |
+
"- Applies intelligent data cleaning, deduplication, and semantic normalization to unify heterogeneous data formats into a consistent patient health graph.\n",
|
| 548 |
+
"\n",
|
| 549 |
+
"### 2. **Real-Time Multimodal Health Analytics Engine** \n",
|
| 550 |
+
"- Employs advanced ML and deep learning models to detect: \n",
|
| 551 |
+
" - Emerging risk patterns (e.g., early signs of infection, deterioration of chronic conditions) \n",
|
| 552 |
+
" - Anomalies (missed medications, unusual vital sign changes) \n",
|
| 553 |
+
" - Compliance gaps (lifestyle, medication adherence) \n",
|
| 554 |
+
"- Continuously updates predictive health trajectories personalized to each patient’s condition and history.\n",
|
| 555 |
+
"\n",
|
| 556 |
+
"### 3. **Proactive Action & Recommendation System** \n",
|
| 557 |
+
"- Generates context-aware, evidence-based alerts and recommendations such as: \n",
|
| 558 |
+
" - Medication reminders or dosage adjustments flagged in consultation with prescribing physicians \n",
|
| 559 |
+
" - Suggestions for scheduling lab tests or specialist visits timely before symptoms worsen \n",
|
| 560 |
+
" - Lifestyle coaching tips adapted using patient preferences and progress \n",
|
| 561 |
+
"- Classes recommendations into urgency tiers (info, caution, immediate action) and routes notifications appropriately.\n",
|
| 562 |
+
"\n",
|
| 563 |
+
"### 4. **Automated Care Coordination & Workflow Integration** \n",
|
| 564 |
+
"- Interacts programmatically with provider scheduling systems, telemedicine platforms, pharmacies, and insurance portals to: \n",
|
| 565 |
+
" - Automatically request appointment reschedules or referrals based on patient status \n",
|
| 566 |
+
" - Notify involved healthcare professionals about critical health events or lab results \n",
|
| 567 |
+
" - Facilitate prescription renewals or modifications with minimal human intervention \n",
|
| 568 |
+
"- Maintains secure, auditable communication logs ensuring compliance (HIPAA, GDPR).\n",
|
| 569 |
+
"\n",
|
| 570 |
+
"### 5. **Patient-Centric Digital Health Companion** \n",
|
| 571 |
+
"- Provides patients with an intuitive mobile/web app featuring: \n",
|
| 572 |
+
" - A dynamic health dashboard summarizing key metrics, risks, and recent activities in plain language \n",
|
| 573 |
+
" - Intelligent daily check-ins and symptom trackers powered by conversational AI \n",
|
| 574 |
+
" - Adaptive educational content tailored to health literacy levels and language preferences \n",
|
| 575 |
+
" - Privacy controls empowering patients to manage data sharing settings\n",
|
| 576 |
+
"\n",
|
| 577 |
+
"---\n",
|
| 578 |
+
"\n",
|
| 579 |
+
"## Technical Architecture (High-Level)\n",
|
| 580 |
+
"\n",
|
| 581 |
+
"- **Data Ingestion Layer:** Connectors for EHRs, wearables, pharmacies, labs \n",
|
| 582 |
+
"- **Data Lake & Processing:** Cloud-native secure storage with HIPAA-compliant encryption \n",
|
| 583 |
+
"- **Knowledge Graph:** Patient-centric semantic graph linking clinical concepts, timelines, interventions \n",
|
| 584 |
+
"- **Analytics & ML Models:** Ensemble predictive models incorporating temporal health data, risk scoring, anomaly detection \n",
|
| 585 |
+
"- **Agentic Orchestrator:** Rule-based and reinforcement learning-driven workflow engine enabling autonomous decision-making and stakeholder communications \n",
|
| 586 |
+
"- **Frontend Interfaces:** Responsive patient app, provider portals, API access for system integration\n",
|
| 587 |
+
"\n",
|
| 588 |
+
"---\n",
|
| 589 |
+
"\n",
|
| 590 |
+
"## Potential Challenges & Mitigations\n",
|
| 591 |
+
"\n",
|
| 592 |
+
"| Challenge | Mitigation Strategy |\n",
|
| 593 |
+
"|-----------|---------------------|\n",
|
| 594 |
+
"| Data privacy & regulatory compliance | Built-in privacy-by-design, end-to-end encryption, rigorous consent management, audit trails |\n",
|
| 595 |
+
"| Data interoperability & standardization | Utilize open standards (FHIR, DICOM), NLP for unstructured data extraction |\n",
|
| 596 |
+
"| Model explainability | Implement interpretable ML techniques and transparent reasoning for clinicians |\n",
|
| 597 |
+
"| Patient engagement sustainability | Gamification, behavior science-driven personalized nudges |\n",
|
| 598 |
+
"| Integration complexity across healthcare IT systems | Modular adaptors/plugins, partnerships with major EHR vendors |\n",
|
| 599 |
+
"\n",
|
| 600 |
+
"---\n",
|
| 601 |
+
"\n",
|
| 602 |
+
"## Impact & Benefits\n",
|
| 603 |
+
"\n",
|
| 604 |
+
"- **For Patients:** Reduced health risks, increased empowerment, improved treatment adherence, and personal convenience \n",
|
| 605 |
+
"- **For Providers:** Enhanced clinical decision support, reduced administrative burden, timely interventions \n",
|
| 606 |
+
"- **For Payers:** Lowered costs via preventive care and reduced hospital readmissions\n",
|
| 607 |
+
"\n",
|
| 608 |
+
"---\n",
|
| 609 |
+
"\n",
|
| 610 |
+
"Would you like me to help you design detailed user journeys, develop specific ML model architectures, or draft an implementation roadmap for **HealthSynth AI**?"
|
| 611 |
+
],
|
| 612 |
+
"text/plain": [
|
| 613 |
+
"<IPython.core.display.Markdown object>"
|
| 614 |
+
]
|
| 615 |
+
},
|
| 616 |
+
"metadata": {},
|
| 617 |
+
"output_type": "display_data"
|
| 618 |
+
}
|
| 619 |
+
],
|
| 620 |
+
"source": [
|
| 621 |
+
"# First create the messages:\n",
|
| 622 |
+
"\n",
|
| 623 |
+
"messages = [{\"role\": \"user\", \"content\": \"I want you to pick a business area that might be worth exploring for an Agentic AI opportunity.\"}]\n",
|
| 624 |
+
"\n",
|
| 625 |
+
"# Then make the first call:\n",
|
| 626 |
+
"\n",
|
| 627 |
+
"response = openai.chat.completions.create(\n",
|
| 628 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 629 |
+
" messages=messages\n",
|
| 630 |
+
")\n",
|
| 631 |
+
"\n",
|
| 632 |
+
"# Then read the business idea:\n",
|
| 633 |
+
"\n",
|
| 634 |
+
"business_idea = response.choices[0].message.content\n",
|
| 635 |
+
"\n",
|
| 636 |
+
"# print(business_idea)\n",
|
| 637 |
+
"\n",
|
| 638 |
+
"messages = [{\"role\": \"user\", \"content\": f\"Please propose a pain-point in the {business_idea} industry.\"}]\n",
|
| 639 |
+
"\n",
|
| 640 |
+
"response = openai.chat.completions.create(\n",
|
| 641 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 642 |
+
" messages=messages\n",
|
| 643 |
+
")\n",
|
| 644 |
+
"\n",
|
| 645 |
+
"pain_point = response.choices[0].message.content\n",
|
| 646 |
+
"\n",
|
| 647 |
+
"messages = [{\"role\": \"user\", \"content\": f\"Please propose an Agentic AI solution to the pain-point: {pain_point}.\"}]\n",
|
| 648 |
+
"\n",
|
| 649 |
+
"response = openai.chat.completions.create(\n",
|
| 650 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 651 |
+
" messages=messages\n",
|
| 652 |
+
")\n",
|
| 653 |
+
"\n",
|
| 654 |
+
"agentic_solution = response.choices[0].message.content\n",
|
| 655 |
+
"\n",
|
| 656 |
+
"display(Markdown(agentic_solution))\n",
|
| 657 |
+
"\n",
|
| 658 |
+
"# And repeat! In the next message, include the business idea within the message"
|
| 659 |
+
]
|
| 660 |
+
},
|
| 661 |
+
{
|
| 662 |
+
"cell_type": "markdown",
|
| 663 |
+
"metadata": {},
|
| 664 |
+
"source": []
|
| 665 |
+
}
|
| 666 |
+
],
|
| 667 |
+
"metadata": {
|
| 668 |
+
"kernelspec": {
|
| 669 |
+
"display_name": ".venv",
|
| 670 |
+
"language": "python",
|
| 671 |
+
"name": "python3"
|
| 672 |
+
},
|
| 673 |
+
"language_info": {
|
| 674 |
+
"codemirror_mode": {
|
| 675 |
+
"name": "ipython",
|
| 676 |
+
"version": 3
|
| 677 |
+
},
|
| 678 |
+
"file_extension": ".py",
|
| 679 |
+
"mimetype": "text/x-python",
|
| 680 |
+
"name": "python",
|
| 681 |
+
"nbconvert_exporter": "python",
|
| 682 |
+
"pygments_lexer": "ipython3",
|
| 683 |
+
"version": "3.12.11"
|
| 684 |
+
}
|
| 685 |
+
},
|
| 686 |
+
"nbformat": 4,
|
| 687 |
+
"nbformat_minor": 2
|
| 688 |
+
}
|
community_contributions/1_lab1_Mudassar.ipynb
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# First Agentic AI workflow with OPENAI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"#### And please do remember to contact me if I can help\n",
|
| 15 |
+
"\n",
|
| 16 |
+
"And I love to connect: https://www.linkedin.com/in/muhammad-mudassar-a65645192/"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "markdown",
|
| 21 |
+
"metadata": {},
|
| 22 |
+
"source": [
|
| 23 |
+
"## Import Libraries"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"cell_type": "code",
|
| 28 |
+
"execution_count": 59,
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"outputs": [],
|
| 31 |
+
"source": [
|
| 32 |
+
"import os\n",
|
| 33 |
+
"import re\n",
|
| 34 |
+
"from openai import OpenAI\n",
|
| 35 |
+
"from dotenv import load_dotenv\n",
|
| 36 |
+
"from IPython.display import Markdown, display"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "code",
|
| 41 |
+
"execution_count": null,
|
| 42 |
+
"metadata": {},
|
| 43 |
+
"outputs": [],
|
| 44 |
+
"source": [
|
| 45 |
+
"load_dotenv(override=True)"
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "code",
|
| 50 |
+
"execution_count": null,
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"outputs": [],
|
| 53 |
+
"source": [
|
| 54 |
+
"openai_api_key=os.getenv(\"OPENAI_API_KEY\")\n",
|
| 55 |
+
"if openai_api_key:\n",
|
| 56 |
+
" print(f\"openai api key exists and begins {openai_api_key[:8]}\")\n",
|
| 57 |
+
"else:\n",
|
| 58 |
+
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the gui\")"
|
| 59 |
+
]
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "markdown",
|
| 63 |
+
"metadata": {},
|
| 64 |
+
"source": [
|
| 65 |
+
"## Workflow with OPENAI"
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"cell_type": "code",
|
| 70 |
+
"execution_count": 21,
|
| 71 |
+
"metadata": {},
|
| 72 |
+
"outputs": [],
|
| 73 |
+
"source": [
|
| 74 |
+
"openai=OpenAI()"
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"cell_type": "code",
|
| 79 |
+
"execution_count": 31,
|
| 80 |
+
"metadata": {},
|
| 81 |
+
"outputs": [],
|
| 82 |
+
"source": [
|
| 83 |
+
"message = [{'role':'user','content':\"what is 2+3?\"}]"
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "code",
|
| 88 |
+
"execution_count": null,
|
| 89 |
+
"metadata": {},
|
| 90 |
+
"outputs": [],
|
| 91 |
+
"source": [
|
| 92 |
+
"response = openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 93 |
+
"print(response.choices[0].message.content)"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": 33,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 103 |
+
"message=[{'role':'user','content':question}]"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "code",
|
| 108 |
+
"execution_count": null,
|
| 109 |
+
"metadata": {},
|
| 110 |
+
"outputs": [],
|
| 111 |
+
"source": [
|
| 112 |
+
"response=openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 113 |
+
"question=response.choices[0].message.content\n",
|
| 114 |
+
"print(f\"Answer: {question}\")"
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"cell_type": "code",
|
| 119 |
+
"execution_count": 35,
|
| 120 |
+
"metadata": {},
|
| 121 |
+
"outputs": [],
|
| 122 |
+
"source": [
|
| 123 |
+
"message=[{'role':'user','content':question}]"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"cell_type": "code",
|
| 128 |
+
"execution_count": null,
|
| 129 |
+
"metadata": {},
|
| 130 |
+
"outputs": [],
|
| 131 |
+
"source": [
|
| 132 |
+
"response=openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 133 |
+
"answer = response.choices[0].message.content\n",
|
| 134 |
+
"print(f\"Answer: {answer}\")"
|
| 135 |
+
]
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"cell_type": "code",
|
| 139 |
+
"execution_count": null,
|
| 140 |
+
"metadata": {},
|
| 141 |
+
"outputs": [],
|
| 142 |
+
"source": [
|
| 143 |
+
"# convert \\[ ... \\] to $$ ... $$, to properly render Latex\n",
|
| 144 |
+
"converted_answer = re.sub(r'\\\\[\\[\\]]', '$$', answer)\n",
|
| 145 |
+
"display(Markdown(converted_answer))"
|
| 146 |
+
]
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"cell_type": "markdown",
|
| 150 |
+
"metadata": {},
|
| 151 |
+
"source": [
|
| 152 |
+
"## Exercise"
|
| 153 |
+
]
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"cell_type": "markdown",
|
| 157 |
+
"metadata": {},
|
| 158 |
+
"source": [
|
| 159 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 160 |
+
" <tr>\n",
|
| 161 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 162 |
+
" <img src=\"../../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 163 |
+
" </td>\n",
|
| 164 |
+
" <td>\n",
|
| 165 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 166 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 167 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 168 |
+
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 169 |
+
" </span>\n",
|
| 170 |
+
" </td>\n",
|
| 171 |
+
" </tr>\n",
|
| 172 |
+
"</table>"
|
| 173 |
+
]
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"cell_type": "code",
|
| 177 |
+
"execution_count": 42,
|
| 178 |
+
"metadata": {},
|
| 179 |
+
"outputs": [],
|
| 180 |
+
"source": [
|
| 181 |
+
"message = [{'role':'user','content':\"give me a business area related to ecommerce that might be worth exploring for a agentic opportunity.\"}]"
|
| 182 |
+
]
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"cell_type": "code",
|
| 186 |
+
"execution_count": null,
|
| 187 |
+
"metadata": {},
|
| 188 |
+
"outputs": [],
|
| 189 |
+
"source": [
|
| 190 |
+
"response = openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 191 |
+
"business_area = response.choices[0].message.content\n",
|
| 192 |
+
"business_area"
|
| 193 |
+
]
|
| 194 |
+
},
|
| 195 |
+
{
|
| 196 |
+
"cell_type": "code",
|
| 197 |
+
"execution_count": null,
|
| 198 |
+
"metadata": {},
|
| 199 |
+
"outputs": [],
|
| 200 |
+
"source": [
|
| 201 |
+
"message = business_area + \"present a pain-point in that industry - something challenging that might be ripe for an agentic solutions.\"\n",
|
| 202 |
+
"message"
|
| 203 |
+
]
|
| 204 |
+
},
|
| 205 |
+
{
|
| 206 |
+
"cell_type": "code",
|
| 207 |
+
"execution_count": null,
|
| 208 |
+
"metadata": {},
|
| 209 |
+
"outputs": [],
|
| 210 |
+
"source": [
|
| 211 |
+
"message = [{'role': 'user', 'content': message}]\n",
|
| 212 |
+
"response = openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 213 |
+
"question=response.choices[0].message.content\n",
|
| 214 |
+
"question"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "code",
|
| 219 |
+
"execution_count": null,
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [],
|
| 222 |
+
"source": [
|
| 223 |
+
"message=[{'role':'user','content':question}]\n",
|
| 224 |
+
"response=openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 225 |
+
"answer=response.choices[0].message.content\n",
|
| 226 |
+
"print(answer)"
|
| 227 |
+
]
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"cell_type": "code",
|
| 231 |
+
"execution_count": null,
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"outputs": [],
|
| 234 |
+
"source": [
|
| 235 |
+
"display(Markdown(answer))"
|
| 236 |
+
]
|
| 237 |
+
}
|
| 238 |
+
],
|
| 239 |
+
"metadata": {
|
| 240 |
+
"kernelspec": {
|
| 241 |
+
"display_name": ".venv",
|
| 242 |
+
"language": "python",
|
| 243 |
+
"name": "python3"
|
| 244 |
+
},
|
| 245 |
+
"language_info": {
|
| 246 |
+
"codemirror_mode": {
|
| 247 |
+
"name": "ipython",
|
| 248 |
+
"version": 3
|
| 249 |
+
},
|
| 250 |
+
"file_extension": ".py",
|
| 251 |
+
"mimetype": "text/x-python",
|
| 252 |
+
"name": "python",
|
| 253 |
+
"nbconvert_exporter": "python",
|
| 254 |
+
"pygments_lexer": "ipython3",
|
| 255 |
+
"version": "3.12.5"
|
| 256 |
+
}
|
| 257 |
+
},
|
| 258 |
+
"nbformat": 4,
|
| 259 |
+
"nbformat_minor": 2
|
| 260 |
+
}
|
community_contributions/1_lab1_Thanh.ipynb
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"### And please do remember to contact me if I can help\n",
|
| 15 |
+
"\n",
|
| 16 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 17 |
+
"\n",
|
| 18 |
+
"\n",
|
| 19 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 20 |
+
"\n",
|
| 21 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 22 |
+
"- Open extensions (View >> extensions)\n",
|
| 23 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 24 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 25 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 26 |
+
"\n",
|
| 27 |
+
"And then:\n",
|
| 28 |
+
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 29 |
+
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 30 |
+
"3. Enjoy!\n",
|
| 31 |
+
"\n",
|
| 32 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 33 |
+
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 34 |
+
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 35 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 36 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 37 |
+
"And then try again.\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 40 |
+
"`conda deactivate` \n",
|
| 41 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 42 |
+
"`conda config --set auto_activate_base false` \n",
|
| 43 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"from dotenv import load_dotenv\n",
|
| 53 |
+
"load_dotenv()"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"# Check the keys\n",
|
| 63 |
+
"import google.generativeai as genai\n",
|
| 64 |
+
"import os\n",
|
| 65 |
+
"genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))\n",
|
| 66 |
+
"model = genai.GenerativeModel(model_name=\"gemini-1.5-flash\")\n"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"cell_type": "code",
|
| 71 |
+
"execution_count": null,
|
| 72 |
+
"metadata": {},
|
| 73 |
+
"outputs": [],
|
| 74 |
+
"source": [
|
| 75 |
+
"# Create a list of messages in the familiar Gemini GenAI format\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"response = model.generate_content([\"2+2=?\"])\n",
|
| 78 |
+
"response.text"
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"cell_type": "code",
|
| 83 |
+
"execution_count": null,
|
| 84 |
+
"metadata": {},
|
| 85 |
+
"outputs": [],
|
| 86 |
+
"source": [
|
| 87 |
+
"# And now - let's ask for a question:\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 90 |
+
"\n",
|
| 91 |
+
"response = model.generate_content([question])\n",
|
| 92 |
+
"print(response.text)"
|
| 93 |
+
]
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"cell_type": "code",
|
| 97 |
+
"execution_count": null,
|
| 98 |
+
"metadata": {},
|
| 99 |
+
"outputs": [],
|
| 100 |
+
"source": [
|
| 101 |
+
"from IPython.display import Markdown, display\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"display(Markdown(response.text))"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "markdown",
|
| 108 |
+
"metadata": {},
|
| 109 |
+
"source": [
|
| 110 |
+
"# Congratulations!\n",
|
| 111 |
+
"\n",
|
| 112 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 113 |
+
"\n",
|
| 114 |
+
"Next time things get more interesting..."
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"cell_type": "code",
|
| 119 |
+
"execution_count": null,
|
| 120 |
+
"metadata": {},
|
| 121 |
+
"outputs": [],
|
| 122 |
+
"source": [
|
| 123 |
+
"# First create the messages:\n",
|
| 124 |
+
"\n",
|
| 125 |
+
"messages = [{\"role\": \"user\", \"content\": \"Something here\"}]\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"# Then make the first call:\n",
|
| 128 |
+
"\n",
|
| 129 |
+
"response =\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"# Then read the business idea:\n",
|
| 132 |
+
"\n",
|
| 133 |
+
"business_idea = response.\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"# And repeat!"
|
| 136 |
+
]
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"cell_type": "markdown",
|
| 140 |
+
"metadata": {},
|
| 141 |
+
"source": []
|
| 142 |
+
}
|
| 143 |
+
],
|
| 144 |
+
"metadata": {
|
| 145 |
+
"kernelspec": {
|
| 146 |
+
"display_name": "llm_projects",
|
| 147 |
+
"language": "python",
|
| 148 |
+
"name": "python3"
|
| 149 |
+
},
|
| 150 |
+
"language_info": {
|
| 151 |
+
"codemirror_mode": {
|
| 152 |
+
"name": "ipython",
|
| 153 |
+
"version": 3
|
| 154 |
+
},
|
| 155 |
+
"file_extension": ".py",
|
| 156 |
+
"mimetype": "text/x-python",
|
| 157 |
+
"name": "python",
|
| 158 |
+
"nbconvert_exporter": "python",
|
| 159 |
+
"pygments_lexer": "ipython3",
|
| 160 |
+
"version": "3.10.15"
|
| 161 |
+
}
|
| 162 |
+
},
|
| 163 |
+
"nbformat": 4,
|
| 164 |
+
"nbformat_minor": 2
|
| 165 |
+
}
|
community_contributions/1_lab1_cm.ipynb
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
+
" <tr>\n",
|
| 16 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
+
" <img src=\"../../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
+
" </td>\n",
|
| 19 |
+
" <td>\n",
|
| 20 |
+
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
+
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
+
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 23 |
+
" Well in that case, you're ready!!\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "markdown",
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"source": [
|
| 34 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 35 |
+
" <tr>\n",
|
| 36 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 37 |
+
" <img src=\"../../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 38 |
+
" </td>\n",
|
| 39 |
+
" <td>\n",
|
| 40 |
+
" <h2 style=\"color:#00bfff;\">Treat these labs as a resource</h2>\n",
|
| 41 |
+
" <span style=\"color:#00bfff;\">I push updates to the code regularly. When people ask questions or have problems, I incorporate it in the code, adding more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but in addition, I've added more steps and better explanations. Consider this like an interactive book that accompanies the lectures.\n",
|
| 42 |
+
" </span>\n",
|
| 43 |
+
" </td>\n",
|
| 44 |
+
" </tr>\n",
|
| 45 |
+
"</table>"
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "markdown",
|
| 50 |
+
"metadata": {},
|
| 51 |
+
"source": [
|
| 52 |
+
"### And please do remember to contact me if I can help\n",
|
| 53 |
+
"\n",
|
| 54 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"\n",
|
| 57 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 60 |
+
"- Open extensions (View >> extensions)\n",
|
| 61 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 62 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 63 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 64 |
+
"\n",
|
| 65 |
+
"And then:\n",
|
| 66 |
+
"1. Run `uv add google-genai` to install the Google Gemini library. (If you had started your environment before running this command, you will need to restart your environment in the Jupyter notebook.)\n",
|
| 67 |
+
"2. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 68 |
+
"3. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 69 |
+
"4. Enjoy!\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 72 |
+
"1. From the Cursor menu, choose Settings >> VSCode Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 73 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 74 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 75 |
+
"And then try again.\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 78 |
+
"`conda deactivate` \n",
|
| 79 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 80 |
+
"`conda config --set auto_activate_base false` \n",
|
| 81 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"cell_type": "code",
|
| 86 |
+
"execution_count": null,
|
| 87 |
+
"metadata": {},
|
| 88 |
+
"outputs": [],
|
| 89 |
+
"source": [
|
| 90 |
+
"from dotenv import load_dotenv\n"
|
| 91 |
+
]
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"cell_type": "code",
|
| 95 |
+
"execution_count": null,
|
| 96 |
+
"metadata": {},
|
| 97 |
+
"outputs": [],
|
| 98 |
+
"source": [
|
| 99 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 100 |
+
"\n",
|
| 101 |
+
"load_dotenv(override=True)"
|
| 102 |
+
]
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"cell_type": "code",
|
| 106 |
+
"execution_count": null,
|
| 107 |
+
"metadata": {},
|
| 108 |
+
"outputs": [],
|
| 109 |
+
"source": [
|
| 110 |
+
"# Check the keys\n",
|
| 111 |
+
"\n",
|
| 112 |
+
"import os\n",
|
| 113 |
+
"gemini_api_key = os.getenv('GEMINI_API_KEY')\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"if gemini_api_key:\n",
|
| 116 |
+
" print(f\"Gemini API Key exists and begins {gemini_api_key[:8]}\")\n",
|
| 117 |
+
"else:\n",
|
| 118 |
+
" print(\"Gemini API Key not set - please head to the troubleshooting guide in the guides folder\")\n",
|
| 119 |
+
" \n"
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"cell_type": "code",
|
| 124 |
+
"execution_count": null,
|
| 125 |
+
"metadata": {},
|
| 126 |
+
"outputs": [],
|
| 127 |
+
"source": [
|
| 128 |
+
"# And now - the all important import statement\n",
|
| 129 |
+
"# If you get an import error - head over to troubleshooting guide\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"from google import genai"
|
| 132 |
+
]
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"cell_type": "code",
|
| 136 |
+
"execution_count": null,
|
| 137 |
+
"metadata": {},
|
| 138 |
+
"outputs": [],
|
| 139 |
+
"source": [
|
| 140 |
+
"# And now we'll create an instance of the Gemini GenAI class\n",
|
| 141 |
+
"# If you're not sure what it means to create an instance of a class - head over to the guides folder!\n",
|
| 142 |
+
"# If you get a NameError - head over to the guides folder to learn about NameErrors\n",
|
| 143 |
+
"\n",
|
| 144 |
+
"client = genai.Client(api_key=gemini_api_key)"
|
| 145 |
+
]
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"cell_type": "code",
|
| 149 |
+
"execution_count": null,
|
| 150 |
+
"metadata": {},
|
| 151 |
+
"outputs": [],
|
| 152 |
+
"source": [
|
| 153 |
+
"# Create a list of messages in the familiar Gemini GenAI format\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"messages = [\"What is 2+2?\"]"
|
| 156 |
+
]
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"cell_type": "code",
|
| 160 |
+
"execution_count": null,
|
| 161 |
+
"metadata": {},
|
| 162 |
+
"outputs": [],
|
| 163 |
+
"source": [
|
| 164 |
+
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"response = client.models.generate_content(\n",
|
| 167 |
+
" model=\"gemini-2.0-flash\", contents=messages\n",
|
| 168 |
+
")\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"print(response.text)\n"
|
| 171 |
+
]
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"cell_type": "code",
|
| 175 |
+
"execution_count": null,
|
| 176 |
+
"metadata": {},
|
| 177 |
+
"outputs": [],
|
| 178 |
+
"source": [
|
| 179 |
+
"\n",
|
| 180 |
+
"# Lets no create a challenging question\n",
|
| 181 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 182 |
+
"\n",
|
| 183 |
+
"# Ask the the model\n",
|
| 184 |
+
"response = client.models.generate_content(\n",
|
| 185 |
+
" model=\"gemini-2.0-flash\", contents=question\n",
|
| 186 |
+
")\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"question = response.text\n",
|
| 189 |
+
"\n",
|
| 190 |
+
"print(question)\n"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": null,
|
| 196 |
+
"metadata": {},
|
| 197 |
+
"outputs": [],
|
| 198 |
+
"source": [
|
| 199 |
+
"# Ask the models generated question to the model\n",
|
| 200 |
+
"response = client.models.generate_content(\n",
|
| 201 |
+
" model=\"gemini-2.0-flash\", contents=question\n",
|
| 202 |
+
")\n",
|
| 203 |
+
"\n",
|
| 204 |
+
"# Extract the answer from the response\n",
|
| 205 |
+
"answer = response.text\n",
|
| 206 |
+
"\n",
|
| 207 |
+
"# Debug log the answer\n",
|
| 208 |
+
"print(answer)\n"
|
| 209 |
+
]
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"cell_type": "code",
|
| 213 |
+
"execution_count": null,
|
| 214 |
+
"metadata": {},
|
| 215 |
+
"outputs": [],
|
| 216 |
+
"source": [
|
| 217 |
+
"from IPython.display import Markdown, display\n",
|
| 218 |
+
"\n",
|
| 219 |
+
"# Nicely format the answer using Markdown\n",
|
| 220 |
+
"display(Markdown(answer))\n",
|
| 221 |
+
"\n"
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"cell_type": "markdown",
|
| 226 |
+
"metadata": {},
|
| 227 |
+
"source": [
|
| 228 |
+
"# Congratulations!\n",
|
| 229 |
+
"\n",
|
| 230 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 231 |
+
"\n",
|
| 232 |
+
"Next time things get more interesting..."
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "markdown",
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"source": [
|
| 239 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 240 |
+
" <tr>\n",
|
| 241 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 242 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 243 |
+
" </td>\n",
|
| 244 |
+
" <td>\n",
|
| 245 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 246 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 247 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 248 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 249 |
+
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 250 |
+
" </span>\n",
|
| 251 |
+
" </td>\n",
|
| 252 |
+
" </tr>\n",
|
| 253 |
+
"</table>"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"execution_count": null,
|
| 259 |
+
"metadata": {},
|
| 260 |
+
"outputs": [],
|
| 261 |
+
"source": [
|
| 262 |
+
"# First create the messages:\n",
|
| 263 |
+
"\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"messages = [\"Something here\"]\n",
|
| 266 |
+
"\n",
|
| 267 |
+
"# Then make the first call:\n",
|
| 268 |
+
"\n",
|
| 269 |
+
"response =\n",
|
| 270 |
+
"\n",
|
| 271 |
+
"# Then read the business idea:\n",
|
| 272 |
+
"\n",
|
| 273 |
+
"business_idea = response.\n",
|
| 274 |
+
"\n",
|
| 275 |
+
"# And repeat!"
|
| 276 |
+
]
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"cell_type": "markdown",
|
| 280 |
+
"metadata": {},
|
| 281 |
+
"source": []
|
| 282 |
+
}
|
| 283 |
+
],
|
| 284 |
+
"metadata": {
|
| 285 |
+
"kernelspec": {
|
| 286 |
+
"display_name": ".venv",
|
| 287 |
+
"language": "python",
|
| 288 |
+
"name": "python3"
|
| 289 |
+
},
|
| 290 |
+
"language_info": {
|
| 291 |
+
"codemirror_mode": {
|
| 292 |
+
"name": "ipython",
|
| 293 |
+
"version": 3
|
| 294 |
+
},
|
| 295 |
+
"file_extension": ".py",
|
| 296 |
+
"mimetype": "text/x-python",
|
| 297 |
+
"name": "python",
|
| 298 |
+
"nbconvert_exporter": "python",
|
| 299 |
+
"pygments_lexer": "ipython3",
|
| 300 |
+
"version": "3.12.10"
|
| 301 |
+
}
|
| 302 |
+
},
|
| 303 |
+
"nbformat": 4,
|
| 304 |
+
"nbformat_minor": 2
|
| 305 |
+
}
|
community_contributions/1_lab1_gemini.ipynb
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
+
" <tr>\n",
|
| 16 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
+
" <img src=\"../../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
+
" </td>\n",
|
| 19 |
+
" <td>\n",
|
| 20 |
+
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
+
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
+
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 23 |
+
" Well in that case, you're ready!!\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "markdown",
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"source": [
|
| 34 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 35 |
+
" <tr>\n",
|
| 36 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 37 |
+
" <img src=\"../../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 38 |
+
" </td>\n",
|
| 39 |
+
" <td>\n",
|
| 40 |
+
" <h2 style=\"color:#00bfff;\">Treat these labs as a resource</h2>\n",
|
| 41 |
+
" <span style=\"color:#00bfff;\">I push updates to the code regularly. When people ask questions or have problems, I incorporate it in the code, adding more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but in addition, I've added more steps and better explanations. Consider this like an interactive book that accompanies the lectures.\n",
|
| 42 |
+
" </span>\n",
|
| 43 |
+
" </td>\n",
|
| 44 |
+
" </tr>\n",
|
| 45 |
+
"</table>"
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "markdown",
|
| 50 |
+
"metadata": {},
|
| 51 |
+
"source": [
|
| 52 |
+
"### And please do remember to contact me if I can help\n",
|
| 53 |
+
"\n",
|
| 54 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"\n",
|
| 57 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 60 |
+
"- Open extensions (View >> extensions)\n",
|
| 61 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 62 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 63 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 64 |
+
"\n",
|
| 65 |
+
"And then:\n",
|
| 66 |
+
"1. Run `uv add google-genai` to install the Google Gemini library. (If you had started your environment before running this command, you will need to restart your environment in the Jupyter notebook.)\n",
|
| 67 |
+
"2. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 68 |
+
"3. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 69 |
+
"4. Enjoy!\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 72 |
+
"1. From the Cursor menu, choose Settings >> VSCode Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 73 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 74 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 75 |
+
"And then try again.\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 78 |
+
"`conda deactivate` \n",
|
| 79 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 80 |
+
"`conda config --set auto_activate_base false` \n",
|
| 81 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"cell_type": "code",
|
| 86 |
+
"execution_count": null,
|
| 87 |
+
"metadata": {},
|
| 88 |
+
"outputs": [],
|
| 89 |
+
"source": [
|
| 90 |
+
"from dotenv import load_dotenv\n"
|
| 91 |
+
]
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"cell_type": "code",
|
| 95 |
+
"execution_count": null,
|
| 96 |
+
"metadata": {},
|
| 97 |
+
"outputs": [],
|
| 98 |
+
"source": [
|
| 99 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 100 |
+
"\n",
|
| 101 |
+
"load_dotenv(override=True)"
|
| 102 |
+
]
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"cell_type": "code",
|
| 106 |
+
"execution_count": null,
|
| 107 |
+
"metadata": {},
|
| 108 |
+
"outputs": [],
|
| 109 |
+
"source": [
|
| 110 |
+
"# Check the keys\n",
|
| 111 |
+
"\n",
|
| 112 |
+
"import os\n",
|
| 113 |
+
"gemini_api_key = os.getenv('GEMINI_API_KEY')\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"if gemini_api_key:\n",
|
| 116 |
+
" print(f\"Gemini API Key exists and begins {gemini_api_key[:8]}\")\n",
|
| 117 |
+
"else:\n",
|
| 118 |
+
" print(\"Gemini API Key not set - please head to the troubleshooting guide in the guides folder\")\n",
|
| 119 |
+
" \n"
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"cell_type": "code",
|
| 124 |
+
"execution_count": null,
|
| 125 |
+
"metadata": {},
|
| 126 |
+
"outputs": [],
|
| 127 |
+
"source": [
|
| 128 |
+
"# And now - the all important import statement\n",
|
| 129 |
+
"# If you get an import error - head over to troubleshooting guide\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"from google import genai"
|
| 132 |
+
]
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"cell_type": "code",
|
| 136 |
+
"execution_count": null,
|
| 137 |
+
"metadata": {},
|
| 138 |
+
"outputs": [],
|
| 139 |
+
"source": [
|
| 140 |
+
"# And now we'll create an instance of the Gemini GenAI class\n",
|
| 141 |
+
"# If you're not sure what it means to create an instance of a class - head over to the guides folder!\n",
|
| 142 |
+
"# If you get a NameError - head over to the guides folder to learn about NameErrors\n",
|
| 143 |
+
"\n",
|
| 144 |
+
"client = genai.Client(api_key=gemini_api_key)"
|
| 145 |
+
]
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"cell_type": "code",
|
| 149 |
+
"execution_count": null,
|
| 150 |
+
"metadata": {},
|
| 151 |
+
"outputs": [],
|
| 152 |
+
"source": [
|
| 153 |
+
"# Create a list of messages in the familiar Gemini GenAI format\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"messages = [\"What is 2+2?\"]"
|
| 156 |
+
]
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"cell_type": "code",
|
| 160 |
+
"execution_count": null,
|
| 161 |
+
"metadata": {},
|
| 162 |
+
"outputs": [],
|
| 163 |
+
"source": [
|
| 164 |
+
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"response = client.models.generate_content(\n",
|
| 167 |
+
" model=\"gemini-2.0-flash\", contents=messages\n",
|
| 168 |
+
")\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"print(response.text)\n"
|
| 171 |
+
]
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"cell_type": "code",
|
| 175 |
+
"execution_count": null,
|
| 176 |
+
"metadata": {},
|
| 177 |
+
"outputs": [],
|
| 178 |
+
"source": [
|
| 179 |
+
"\n",
|
| 180 |
+
"# Lets no create a challenging question\n",
|
| 181 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 182 |
+
"\n",
|
| 183 |
+
"# Ask the the model\n",
|
| 184 |
+
"response = client.models.generate_content(\n",
|
| 185 |
+
" model=\"gemini-2.0-flash\", contents=question\n",
|
| 186 |
+
")\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"question = response.text\n",
|
| 189 |
+
"\n",
|
| 190 |
+
"print(question)\n"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": null,
|
| 196 |
+
"metadata": {},
|
| 197 |
+
"outputs": [],
|
| 198 |
+
"source": [
|
| 199 |
+
"# Ask the models generated question to the model\n",
|
| 200 |
+
"response = client.models.generate_content(\n",
|
| 201 |
+
" model=\"gemini-2.0-flash\", contents=question\n",
|
| 202 |
+
")\n",
|
| 203 |
+
"\n",
|
| 204 |
+
"# Extract the answer from the response\n",
|
| 205 |
+
"answer = response.text\n",
|
| 206 |
+
"\n",
|
| 207 |
+
"# Debug log the answer\n",
|
| 208 |
+
"print(answer)\n"
|
| 209 |
+
]
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"cell_type": "code",
|
| 213 |
+
"execution_count": null,
|
| 214 |
+
"metadata": {},
|
| 215 |
+
"outputs": [],
|
| 216 |
+
"source": [
|
| 217 |
+
"from IPython.display import Markdown, display\n",
|
| 218 |
+
"\n",
|
| 219 |
+
"# Nicely format the answer using Markdown\n",
|
| 220 |
+
"display(Markdown(answer))\n",
|
| 221 |
+
"\n"
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"cell_type": "markdown",
|
| 226 |
+
"metadata": {},
|
| 227 |
+
"source": [
|
| 228 |
+
"# Congratulations!\n",
|
| 229 |
+
"\n",
|
| 230 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 231 |
+
"\n",
|
| 232 |
+
"Next time things get more interesting..."
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "markdown",
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"source": [
|
| 239 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 240 |
+
" <tr>\n",
|
| 241 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 242 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 243 |
+
" </td>\n",
|
| 244 |
+
" <td>\n",
|
| 245 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 246 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 247 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 248 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 249 |
+
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 250 |
+
" </span>\n",
|
| 251 |
+
" </td>\n",
|
| 252 |
+
" </tr>\n",
|
| 253 |
+
"</table>"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"execution_count": null,
|
| 259 |
+
"metadata": {},
|
| 260 |
+
"outputs": [],
|
| 261 |
+
"source": [
|
| 262 |
+
"# First create the messages:\n",
|
| 263 |
+
"\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"messages = [\"Something here\"]\n",
|
| 266 |
+
"\n",
|
| 267 |
+
"# Then make the first call:\n",
|
| 268 |
+
"\n",
|
| 269 |
+
"response =\n",
|
| 270 |
+
"\n",
|
| 271 |
+
"# Then read the business idea:\n",
|
| 272 |
+
"\n",
|
| 273 |
+
"business_idea = response.\n",
|
| 274 |
+
"\n",
|
| 275 |
+
"# And repeat!"
|
| 276 |
+
]
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"cell_type": "markdown",
|
| 280 |
+
"metadata": {},
|
| 281 |
+
"source": []
|
| 282 |
+
}
|
| 283 |
+
],
|
| 284 |
+
"metadata": {
|
| 285 |
+
"kernelspec": {
|
| 286 |
+
"display_name": ".venv",
|
| 287 |
+
"language": "python",
|
| 288 |
+
"name": "python3"
|
| 289 |
+
},
|
| 290 |
+
"language_info": {
|
| 291 |
+
"codemirror_mode": {
|
| 292 |
+
"name": "ipython",
|
| 293 |
+
"version": 3
|
| 294 |
+
},
|
| 295 |
+
"file_extension": ".py",
|
| 296 |
+
"mimetype": "text/x-python",
|
| 297 |
+
"name": "python",
|
| 298 |
+
"nbconvert_exporter": "python",
|
| 299 |
+
"pygments_lexer": "ipython3",
|
| 300 |
+
"version": "3.12.10"
|
| 301 |
+
}
|
| 302 |
+
},
|
| 303 |
+
"nbformat": 4,
|
| 304 |
+
"nbformat_minor": 2
|
| 305 |
+
}
|
community_contributions/1_lab1_groq.ipynb
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"### Implementing Notebook 1 using various LLMs via Groq"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "code",
|
| 12 |
+
"execution_count": null,
|
| 13 |
+
"metadata": {},
|
| 14 |
+
"outputs": [],
|
| 15 |
+
"source": [
|
| 16 |
+
"from dotenv import load_dotenv"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "code",
|
| 21 |
+
"execution_count": null,
|
| 22 |
+
"metadata": {},
|
| 23 |
+
"outputs": [],
|
| 24 |
+
"source": [
|
| 25 |
+
"load_dotenv(override=True)"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": null,
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"outputs": [],
|
| 33 |
+
"source": [
|
| 34 |
+
"import os\n",
|
| 35 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 36 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"if openai_api_key:\n",
|
| 39 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 40 |
+
"else:\n",
|
| 41 |
+
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 42 |
+
"\n",
|
| 43 |
+
"if groq_api_key:\n",
|
| 44 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:2]}\")\n",
|
| 45 |
+
"else:\n",
|
| 46 |
+
" print(\"Groq API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 47 |
+
" \n"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "code",
|
| 52 |
+
"execution_count": null,
|
| 53 |
+
"metadata": {},
|
| 54 |
+
"outputs": [],
|
| 55 |
+
"source": [
|
| 56 |
+
"from openai import OpenAI"
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"cell_type": "code",
|
| 61 |
+
"execution_count": null,
|
| 62 |
+
"metadata": {},
|
| 63 |
+
"outputs": [],
|
| 64 |
+
"source": [
|
| 65 |
+
"openai = OpenAI(\n",
|
| 66 |
+
" base_url=\"https://api.groq.com/openai/v1\",\n",
|
| 67 |
+
" api_key=groq_api_key\n",
|
| 68 |
+
")"
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"cell_type": "code",
|
| 73 |
+
"execution_count": null,
|
| 74 |
+
"metadata": {},
|
| 75 |
+
"outputs": [],
|
| 76 |
+
"source": [
|
| 77 |
+
"# And now - let's ask for a question:\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 80 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 81 |
+
]
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"cell_type": "code",
|
| 85 |
+
"execution_count": null,
|
| 86 |
+
"metadata": {},
|
| 87 |
+
"outputs": [],
|
| 88 |
+
"source": [
|
| 89 |
+
"# openai/gpt-oss-120b\n",
|
| 90 |
+
"\n",
|
| 91 |
+
"response = openai.chat.completions.create(\n",
|
| 92 |
+
" model=\"openai/gpt-oss-120b\",\n",
|
| 93 |
+
" messages=messages\n",
|
| 94 |
+
")\n",
|
| 95 |
+
"\n",
|
| 96 |
+
"print(response.choices[0].message.content)\n",
|
| 97 |
+
"\n"
|
| 98 |
+
]
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"cell_type": "code",
|
| 102 |
+
"execution_count": null,
|
| 103 |
+
"metadata": {},
|
| 104 |
+
"outputs": [],
|
| 105 |
+
"source": [
|
| 106 |
+
"# moonshotai/kimi-k2-instruct\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"response = openai.chat.completions.create(\n",
|
| 109 |
+
" model=\"moonshotai/kimi-k2-instruct\",\n",
|
| 110 |
+
" messages=messages\n",
|
| 111 |
+
")\n",
|
| 112 |
+
"\n",
|
| 113 |
+
"question = response.choices[0].message.content\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"print(question)\n"
|
| 116 |
+
]
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"cell_type": "code",
|
| 120 |
+
"execution_count": null,
|
| 121 |
+
"metadata": {},
|
| 122 |
+
"outputs": [],
|
| 123 |
+
"source": [
|
| 124 |
+
"# form a new messages list\n",
|
| 125 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 126 |
+
]
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"cell_type": "code",
|
| 130 |
+
"execution_count": null,
|
| 131 |
+
"metadata": {},
|
| 132 |
+
"outputs": [],
|
| 133 |
+
"source": [
|
| 134 |
+
"# Ask meta-llama/llama-guard-4-12b\n",
|
| 135 |
+
"\n",
|
| 136 |
+
"response = openai.chat.completions.create(\n",
|
| 137 |
+
" model=\"llama-3.1-8b-instant\",\n",
|
| 138 |
+
" messages=messages\n",
|
| 139 |
+
")\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"answer = response.choices[0].message.content\n",
|
| 142 |
+
"print(answer)\n"
|
| 143 |
+
]
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"cell_type": "code",
|
| 147 |
+
"execution_count": null,
|
| 148 |
+
"metadata": {},
|
| 149 |
+
"outputs": [],
|
| 150 |
+
"source": [
|
| 151 |
+
"from IPython.display import Markdown, display\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"display(Markdown(question))\n",
|
| 154 |
+
"display(Markdown(answer))"
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"cell_type": "markdown",
|
| 159 |
+
"metadata": {},
|
| 160 |
+
"source": [
|
| 161 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 162 |
+
" <tr>\n",
|
| 163 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 164 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 165 |
+
" </td>\n",
|
| 166 |
+
" <td>\n",
|
| 167 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 168 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 169 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 170 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 171 |
+
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 172 |
+
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 173 |
+
" </span>\n",
|
| 174 |
+
" </td>\n",
|
| 175 |
+
" </tr>\n",
|
| 176 |
+
"</table>"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"cell_type": "code",
|
| 181 |
+
"execution_count": null,
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"outputs": [],
|
| 184 |
+
"source": [
|
| 185 |
+
"# First create the messages:\n",
|
| 186 |
+
"\n",
|
| 187 |
+
"messages = [{\"role\": \"user\", \"content\": \"Pick a business area that is worth exploring for a Gen-Z audience, that can be an agentic-ai opportunity. \\\n",
|
| 188 |
+
" Somehwere where the concept of agentisation can be applied commerically. Respond only with the business idea.\"}]\n",
|
| 189 |
+
"\n",
|
| 190 |
+
"# Then make the first call: \n",
|
| 191 |
+
"\n",
|
| 192 |
+
"response = openai.chat.completions.create(\n",
|
| 193 |
+
" model = \"qwen/qwen3-32b\",\n",
|
| 194 |
+
" messages = messages\n",
|
| 195 |
+
")\n",
|
| 196 |
+
"\n",
|
| 197 |
+
"# Then read the business idea:\n",
|
| 198 |
+
"\n",
|
| 199 |
+
"business_idea = response.choices[0].message.content\n",
|
| 200 |
+
"print(business_idea)\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"# And repeat! In the next message, include the business idea within the message\n",
|
| 203 |
+
"\n",
|
| 204 |
+
"user_prompt_pain_point = f\"What is the pain point of the Gen-Z audience in the business area of {business_idea}?, that can be solved by an agentic-ai solution? Give a brief answer\"\n",
|
| 205 |
+
"\n",
|
| 206 |
+
"response = openai.chat.completions.create(\n",
|
| 207 |
+
" model = \"gemma2-9b-it\",\n",
|
| 208 |
+
" messages = [{\"role\": \"user\", \"content\": user_prompt_pain_point}]\n",
|
| 209 |
+
")\n",
|
| 210 |
+
"\n",
|
| 211 |
+
"pain_point = response.choices[0].message.content\n",
|
| 212 |
+
"print(pain_point)\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"user_prompt_solution = f\"What is the solution to the pain point {pain_point} of the Gen-Z audience in the business area of {business_idea}?, that can be solved by an agentic-ai solution? Provide a step-by-step breakdown\"\n",
|
| 215 |
+
"\n",
|
| 216 |
+
"response = openai.chat.completions.create(\n",
|
| 217 |
+
" model = \"deepseek-r1-distill-llama-70b\",\n",
|
| 218 |
+
" messages = [{\"role\": \"user\", \"content\": user_prompt_solution}]\n",
|
| 219 |
+
")\n",
|
| 220 |
+
"\n",
|
| 221 |
+
"business_solution = response.choices[0].message.content"
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"cell_type": "code",
|
| 226 |
+
"execution_count": null,
|
| 227 |
+
"metadata": {},
|
| 228 |
+
"outputs": [],
|
| 229 |
+
"source": [
|
| 230 |
+
"display(Markdown(business_solution))"
|
| 231 |
+
]
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"cell_type": "code",
|
| 235 |
+
"execution_count": null,
|
| 236 |
+
"metadata": {},
|
| 237 |
+
"outputs": [],
|
| 238 |
+
"source": []
|
| 239 |
+
}
|
| 240 |
+
],
|
| 241 |
+
"metadata": {
|
| 242 |
+
"kernelspec": {
|
| 243 |
+
"display_name": ".venv",
|
| 244 |
+
"language": "python",
|
| 245 |
+
"name": "python3"
|
| 246 |
+
},
|
| 247 |
+
"language_info": {
|
| 248 |
+
"codemirror_mode": {
|
| 249 |
+
"name": "ipython",
|
| 250 |
+
"version": 3
|
| 251 |
+
},
|
| 252 |
+
"file_extension": ".py",
|
| 253 |
+
"mimetype": "text/x-python",
|
| 254 |
+
"name": "python",
|
| 255 |
+
"nbconvert_exporter": "python",
|
| 256 |
+
"pygments_lexer": "ipython3",
|
| 257 |
+
"version": "3.12.2"
|
| 258 |
+
}
|
| 259 |
+
},
|
| 260 |
+
"nbformat": 4,
|
| 261 |
+
"nbformat_minor": 2
|
| 262 |
+
}
|
community_contributions/1_lab1_groq_llama.ipynb
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# First Agentic AI workflow with Groq and Llama-3.3 LLM(Free of cost) "
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "code",
|
| 12 |
+
"execution_count": 1,
|
| 13 |
+
"metadata": {},
|
| 14 |
+
"outputs": [],
|
| 15 |
+
"source": [
|
| 16 |
+
"# First let's do an import\n",
|
| 17 |
+
"from dotenv import load_dotenv"
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "code",
|
| 22 |
+
"execution_count": null,
|
| 23 |
+
"metadata": {},
|
| 24 |
+
"outputs": [],
|
| 25 |
+
"source": [
|
| 26 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 27 |
+
"\n",
|
| 28 |
+
"load_dotenv(override=True)"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "code",
|
| 33 |
+
"execution_count": null,
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"outputs": [],
|
| 36 |
+
"source": [
|
| 37 |
+
"# Check the Groq API key\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"import os\n",
|
| 40 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 41 |
+
"\n",
|
| 42 |
+
"if groq_api_key:\n",
|
| 43 |
+
" print(f\"GROQ API Key exists and begins {groq_api_key[:8]}\")\n",
|
| 44 |
+
"else:\n",
|
| 45 |
+
" print(\"GROQ API Key not set\")\n",
|
| 46 |
+
" \n"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"cell_type": "code",
|
| 51 |
+
"execution_count": 4,
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"outputs": [],
|
| 54 |
+
"source": [
|
| 55 |
+
"# And now - the all important import statement\n",
|
| 56 |
+
"# If you get an import error - head over to troubleshooting guide\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"from groq import Groq"
|
| 59 |
+
]
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "code",
|
| 63 |
+
"execution_count": 5,
|
| 64 |
+
"metadata": {},
|
| 65 |
+
"outputs": [],
|
| 66 |
+
"source": [
|
| 67 |
+
"# Create a Groq instance\n",
|
| 68 |
+
"groq = Groq()"
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"cell_type": "code",
|
| 73 |
+
"execution_count": 6,
|
| 74 |
+
"metadata": {},
|
| 75 |
+
"outputs": [],
|
| 76 |
+
"source": [
|
| 77 |
+
"# Create a list of messages in the familiar Groq format\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 80 |
+
]
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"cell_type": "code",
|
| 84 |
+
"execution_count": null,
|
| 85 |
+
"metadata": {},
|
| 86 |
+
"outputs": [],
|
| 87 |
+
"source": [
|
| 88 |
+
"# And now call it!\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"response = groq.chat.completions.create(model='llama-3.3-70b-versatile', messages=messages)\n",
|
| 91 |
+
"print(response.choices[0].message.content)\n"
|
| 92 |
+
]
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"cell_type": "code",
|
| 96 |
+
"execution_count": null,
|
| 97 |
+
"metadata": {},
|
| 98 |
+
"outputs": [],
|
| 99 |
+
"source": []
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"cell_type": "code",
|
| 103 |
+
"execution_count": 8,
|
| 104 |
+
"metadata": {},
|
| 105 |
+
"outputs": [],
|
| 106 |
+
"source": [
|
| 107 |
+
"# And now - let's ask for a question:\n",
|
| 108 |
+
"\n",
|
| 109 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 110 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 111 |
+
]
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"cell_type": "code",
|
| 115 |
+
"execution_count": null,
|
| 116 |
+
"metadata": {},
|
| 117 |
+
"outputs": [],
|
| 118 |
+
"source": [
|
| 119 |
+
"# ask it\n",
|
| 120 |
+
"response = groq.chat.completions.create(\n",
|
| 121 |
+
" model=\"llama-3.3-70b-versatile\",\n",
|
| 122 |
+
" messages=messages\n",
|
| 123 |
+
")\n",
|
| 124 |
+
"\n",
|
| 125 |
+
"question = response.choices[0].message.content\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"print(question)\n"
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"cell_type": "code",
|
| 132 |
+
"execution_count": 10,
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [],
|
| 135 |
+
"source": [
|
| 136 |
+
"# form a new messages list\n",
|
| 137 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 138 |
+
]
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"cell_type": "code",
|
| 142 |
+
"execution_count": null,
|
| 143 |
+
"metadata": {},
|
| 144 |
+
"outputs": [],
|
| 145 |
+
"source": [
|
| 146 |
+
"# Ask it again\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"response = groq.chat.completions.create(\n",
|
| 149 |
+
" model=\"llama-3.3-70b-versatile\",\n",
|
| 150 |
+
" messages=messages\n",
|
| 151 |
+
")\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"answer = response.choices[0].message.content\n",
|
| 154 |
+
"print(answer)\n"
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"cell_type": "code",
|
| 159 |
+
"execution_count": null,
|
| 160 |
+
"metadata": {},
|
| 161 |
+
"outputs": [],
|
| 162 |
+
"source": [
|
| 163 |
+
"from IPython.display import Markdown, display\n",
|
| 164 |
+
"\n",
|
| 165 |
+
"display(Markdown(answer))\n",
|
| 166 |
+
"\n"
|
| 167 |
+
]
|
| 168 |
+
},
|
| 169 |
+
{
|
| 170 |
+
"cell_type": "markdown",
|
| 171 |
+
"metadata": {},
|
| 172 |
+
"source": [
|
| 173 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 174 |
+
" <tr>\n",
|
| 175 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 176 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 177 |
+
" </td>\n",
|
| 178 |
+
" <td>\n",
|
| 179 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 180 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 181 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 182 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 183 |
+
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 184 |
+
" </span>\n",
|
| 185 |
+
" </td>\n",
|
| 186 |
+
" </tr>\n",
|
| 187 |
+
"</table>"
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"cell_type": "code",
|
| 192 |
+
"execution_count": 17,
|
| 193 |
+
"metadata": {},
|
| 194 |
+
"outputs": [],
|
| 195 |
+
"source": [
|
| 196 |
+
"# First create the messages:\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"messages = [{\"role\": \"user\", \"content\": \"Give me a business area that might be ripe for an Agentic AI solution.\"}]\n",
|
| 199 |
+
"\n",
|
| 200 |
+
"# Then make the first call:\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"response = groq.chat.completions.create(model='llama-3.3-70b-versatile', messages=messages)\n",
|
| 203 |
+
"\n",
|
| 204 |
+
"# Then read the business idea:\n",
|
| 205 |
+
"\n",
|
| 206 |
+
"business_idea = response.choices[0].message.content\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"\n",
|
| 209 |
+
"# And repeat!"
|
| 210 |
+
]
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"cell_type": "code",
|
| 214 |
+
"execution_count": null,
|
| 215 |
+
"metadata": {},
|
| 216 |
+
"outputs": [],
|
| 217 |
+
"source": [
|
| 218 |
+
"\n",
|
| 219 |
+
"display(Markdown(business_idea))"
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"cell_type": "code",
|
| 224 |
+
"execution_count": 19,
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"outputs": [],
|
| 227 |
+
"source": [
|
| 228 |
+
"# Update the message with the business idea from previous step\n",
|
| 229 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is the pain point in the business area of \" + business_idea + \"?\"}]"
|
| 230 |
+
]
|
| 231 |
+
},
|
| 232 |
+
{
|
| 233 |
+
"cell_type": "code",
|
| 234 |
+
"execution_count": 20,
|
| 235 |
+
"metadata": {},
|
| 236 |
+
"outputs": [],
|
| 237 |
+
"source": [
|
| 238 |
+
"# Make the second call\n",
|
| 239 |
+
"response = groq.chat.completions.create(model='llama-3.3-70b-versatile', messages=messages)\n",
|
| 240 |
+
"# Read the pain point\n",
|
| 241 |
+
"pain_point = response.choices[0].message.content\n"
|
| 242 |
+
]
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"cell_type": "code",
|
| 246 |
+
"execution_count": null,
|
| 247 |
+
"metadata": {},
|
| 248 |
+
"outputs": [],
|
| 249 |
+
"source": [
|
| 250 |
+
"display(Markdown(pain_point))\n"
|
| 251 |
+
]
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"cell_type": "code",
|
| 255 |
+
"execution_count": null,
|
| 256 |
+
"metadata": {},
|
| 257 |
+
"outputs": [],
|
| 258 |
+
"source": [
|
| 259 |
+
"# Make the third call\n",
|
| 260 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is the Agentic AI solution for the pain point of \" + pain_point + \"?\"}]\n",
|
| 261 |
+
"response = groq.chat.completions.create(model='llama-3.3-70b-versatile', messages=messages)\n",
|
| 262 |
+
"# Read the agentic solution\n",
|
| 263 |
+
"agentic_solution = response.choices[0].message.content\n",
|
| 264 |
+
"display(Markdown(agentic_solution))"
|
| 265 |
+
]
|
| 266 |
+
},
|
| 267 |
+
{
|
| 268 |
+
"cell_type": "code",
|
| 269 |
+
"execution_count": null,
|
| 270 |
+
"metadata": {},
|
| 271 |
+
"outputs": [],
|
| 272 |
+
"source": []
|
| 273 |
+
}
|
| 274 |
+
],
|
| 275 |
+
"metadata": {
|
| 276 |
+
"kernelspec": {
|
| 277 |
+
"display_name": ".venv",
|
| 278 |
+
"language": "python",
|
| 279 |
+
"name": "python3"
|
| 280 |
+
},
|
| 281 |
+
"language_info": {
|
| 282 |
+
"codemirror_mode": {
|
| 283 |
+
"name": "ipython",
|
| 284 |
+
"version": 3
|
| 285 |
+
},
|
| 286 |
+
"file_extension": ".py",
|
| 287 |
+
"mimetype": "text/x-python",
|
| 288 |
+
"name": "python",
|
| 289 |
+
"nbconvert_exporter": "python",
|
| 290 |
+
"pygments_lexer": "ipython3",
|
| 291 |
+
"version": "3.12.10"
|
| 292 |
+
}
|
| 293 |
+
},
|
| 294 |
+
"nbformat": 4,
|
| 295 |
+
"nbformat_minor": 2
|
| 296 |
+
}
|
community_contributions/1_lab1_marstipton_mac.ipynb
ADDED
|
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
+
" <tr>\n",
|
| 16 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
+
" </td>\n",
|
| 19 |
+
" <td>\n",
|
| 20 |
+
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
+
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
+
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
+
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
+
" Well in that case, you're ready!!\n",
|
| 25 |
+
" </span>\n",
|
| 26 |
+
" </td>\n",
|
| 27 |
+
" </tr>\n",
|
| 28 |
+
"</table>"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"source": [
|
| 35 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
+
" <tr>\n",
|
| 37 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
+
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
+
" </td>\n",
|
| 40 |
+
" <td>\n",
|
| 41 |
+
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
+
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
+
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
+
" </span>\n",
|
| 45 |
+
" </td>\n",
|
| 46 |
+
" </tr>\n",
|
| 47 |
+
"</table>"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "markdown",
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"source": [
|
| 54 |
+
"### And please do remember to contact me if I can help\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
+
"\n",
|
| 61 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
+
"- Open extensions (View >> extensions)\n",
|
| 63 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
+
"\n",
|
| 67 |
+
"And then:\n",
|
| 68 |
+
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
+
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
+
"3. Enjoy!\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
+
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
+
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
+
"And then try again.\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
+
"`conda deactivate` \n",
|
| 81 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
+
"`conda config --set auto_activate_base false` \n",
|
| 83 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "code",
|
| 88 |
+
"execution_count": 12,
|
| 89 |
+
"metadata": {},
|
| 90 |
+
"outputs": [],
|
| 91 |
+
"source": [
|
| 92 |
+
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"from dotenv import load_dotenv\n"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": null,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 104 |
+
"# If this returns false, see the next cell!\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"load_dotenv(override=True)"
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "markdown",
|
| 111 |
+
"metadata": {},
|
| 112 |
+
"source": [
|
| 113 |
+
"### Wait, did that just output `False`??\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 116 |
+
"\n",
|
| 117 |
+
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 118 |
+
"\n",
|
| 119 |
+
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"cell_type": "markdown",
|
| 124 |
+
"metadata": {},
|
| 125 |
+
"source": [
|
| 126 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 127 |
+
" <tr>\n",
|
| 128 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 129 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 130 |
+
" </td>\n",
|
| 131 |
+
" <td>\n",
|
| 132 |
+
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 133 |
+
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 134 |
+
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 135 |
+
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 136 |
+
" </span>\n",
|
| 137 |
+
" </td>\n",
|
| 138 |
+
" </tr>\n",
|
| 139 |
+
"</table>"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": null,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [],
|
| 147 |
+
"source": [
|
| 148 |
+
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"import os\n",
|
| 151 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"if openai_api_key:\n",
|
| 154 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 155 |
+
"else:\n",
|
| 156 |
+
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 157 |
+
" \n"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": 15,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"# And now - the all important import statement\n",
|
| 167 |
+
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 168 |
+
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"from openai import OpenAI"
|
| 171 |
+
]
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"cell_type": "code",
|
| 175 |
+
"execution_count": 16,
|
| 176 |
+
"metadata": {},
|
| 177 |
+
"outputs": [],
|
| 178 |
+
"source": [
|
| 179 |
+
"# And now we'll create an instance of the OpenAI class\n",
|
| 180 |
+
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 181 |
+
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 182 |
+
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 183 |
+
"\n",
|
| 184 |
+
"openai = OpenAI()"
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"cell_type": "code",
|
| 189 |
+
"execution_count": 17,
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"outputs": [],
|
| 192 |
+
"source": [
|
| 193 |
+
"# Create a list of messages in the familiar OpenAI format\n",
|
| 194 |
+
"\n",
|
| 195 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 196 |
+
]
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"cell_type": "code",
|
| 200 |
+
"execution_count": null,
|
| 201 |
+
"metadata": {},
|
| 202 |
+
"outputs": [],
|
| 203 |
+
"source": [
|
| 204 |
+
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 205 |
+
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 206 |
+
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 207 |
+
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 208 |
+
"\n",
|
| 209 |
+
"response = openai.chat.completions.create(\n",
|
| 210 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 211 |
+
" messages=messages\n",
|
| 212 |
+
")\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"print(response.choices[0].message.content)\n"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "code",
|
| 219 |
+
"execution_count": 8,
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [],
|
| 222 |
+
"source": [
|
| 223 |
+
"# And now - let's ask for a question:\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 226 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 227 |
+
]
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"cell_type": "code",
|
| 231 |
+
"execution_count": null,
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"outputs": [],
|
| 234 |
+
"source": [
|
| 235 |
+
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 236 |
+
"\n",
|
| 237 |
+
"response = openai.chat.completions.create(\n",
|
| 238 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 239 |
+
" messages=messages\n",
|
| 240 |
+
")\n",
|
| 241 |
+
"\n",
|
| 242 |
+
"question = response.choices[0].message.content\n",
|
| 243 |
+
"\n",
|
| 244 |
+
"print(question)\n"
|
| 245 |
+
]
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"cell_type": "code",
|
| 249 |
+
"execution_count": 10,
|
| 250 |
+
"metadata": {},
|
| 251 |
+
"outputs": [],
|
| 252 |
+
"source": [
|
| 253 |
+
"# form a new messages list\n",
|
| 254 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 255 |
+
]
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"cell_type": "code",
|
| 259 |
+
"execution_count": null,
|
| 260 |
+
"metadata": {},
|
| 261 |
+
"outputs": [],
|
| 262 |
+
"source": [
|
| 263 |
+
"# Ask it again\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"response = openai.chat.completions.create(\n",
|
| 266 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 267 |
+
" messages=messages\n",
|
| 268 |
+
")\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"answer = response.choices[0].message.content\n",
|
| 271 |
+
"print(answer)\n"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": null,
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"outputs": [],
|
| 279 |
+
"source": [
|
| 280 |
+
"from IPython.display import Markdown, display\n",
|
| 281 |
+
"\n",
|
| 282 |
+
"display(Markdown(answer))\n",
|
| 283 |
+
"\n"
|
| 284 |
+
]
|
| 285 |
+
},
|
| 286 |
+
{
|
| 287 |
+
"cell_type": "markdown",
|
| 288 |
+
"metadata": {},
|
| 289 |
+
"source": [
|
| 290 |
+
"# Congratulations!\n",
|
| 291 |
+
"\n",
|
| 292 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 293 |
+
"\n",
|
| 294 |
+
"Next time things get more interesting..."
|
| 295 |
+
]
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"cell_type": "markdown",
|
| 299 |
+
"metadata": {},
|
| 300 |
+
"source": [
|
| 301 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 302 |
+
" <tr>\n",
|
| 303 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 304 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 305 |
+
" </td>\n",
|
| 306 |
+
" <td>\n",
|
| 307 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 308 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 309 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 310 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 311 |
+
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 312 |
+
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 313 |
+
" </span>\n",
|
| 314 |
+
" </td>\n",
|
| 315 |
+
" </tr>\n",
|
| 316 |
+
"</table>"
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "code",
|
| 321 |
+
"execution_count": null,
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"outputs": [],
|
| 324 |
+
"source": [
|
| 325 |
+
"# Step 1: Define the conversation\n",
|
| 326 |
+
"messages = [\n",
|
| 327 |
+
" {\"role\": \"system\", \"content\": \"You are an expert in agentic AI business ideation.\"}\n",
|
| 328 |
+
"]\n",
|
| 329 |
+
"\n",
|
| 330 |
+
"# Step 2: Ask the first question\n",
|
| 331 |
+
"area_prompt = (\n",
|
| 332 |
+
" \"Pick a business area within Singapore startups as of Q4 2025 \"\n",
|
| 333 |
+
" \"that might be worth exploring for an Agentic AI opportunity. \"\n",
|
| 334 |
+
" \"Explain in simple language (for a 15-year-old) and cite resources briefly.\"\n",
|
| 335 |
+
")\n",
|
| 336 |
+
"messages.append({\"role\": \"user\", \"content\": area_prompt})\n",
|
| 337 |
+
"\n",
|
| 338 |
+
"response = openai.chat.completions.create(\n",
|
| 339 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 340 |
+
" messages=messages\n",
|
| 341 |
+
")\n",
|
| 342 |
+
"area = response.choices[0].message.content\n",
|
| 343 |
+
"display(Markdown(area))\n",
|
| 344 |
+
"\n",
|
| 345 |
+
"# Add model response to context\n",
|
| 346 |
+
"messages.append({\"role\": \"assistant\", \"content\": area})\n",
|
| 347 |
+
"\n",
|
| 348 |
+
"# Step 3: Ask for a pain point\n",
|
| 349 |
+
"painpoint_prompt = (\n",
|
| 350 |
+
" \"Based on your previous response, pick a recurring pain point in that area \"\n",
|
| 351 |
+
" \"that is ripe for an Agentic AI solution.\"\n",
|
| 352 |
+
")\n",
|
| 353 |
+
"messages.append({\"role\": \"user\", \"content\": painpoint_prompt})\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"response = openai.chat.completions.create(\n",
|
| 356 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 357 |
+
" messages=messages\n",
|
| 358 |
+
")\n",
|
| 359 |
+
"painpoint = response.choices[0].message.content\n",
|
| 360 |
+
"display(Markdown(painpoint))\n",
|
| 361 |
+
"\n",
|
| 362 |
+
"# Add model response to context\n",
|
| 363 |
+
"messages.append({\"role\": \"assistant\", \"content\": painpoint})\n",
|
| 364 |
+
"\n",
|
| 365 |
+
"# Step 4: Propose a business idea\n",
|
| 366 |
+
"business_idea_prompt = (\n",
|
| 367 |
+
" \"Propose an Agentic AI solution addressing the pain point above. \"\n",
|
| 368 |
+
" \"Solution should have low overhead, be secure, and offer 80% free functionality, \"\n",
|
| 369 |
+
" \"with full access for SGD 0.99/month per user or SGD 15/org (max 30 users).\"\n",
|
| 370 |
+
")\n",
|
| 371 |
+
"messages.append({\"role\": \"user\", \"content\": business_idea_prompt})\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"response = openai.chat.completions.create(\n",
|
| 374 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 375 |
+
" messages=messages\n",
|
| 376 |
+
")\n",
|
| 377 |
+
"business_idea = response.choices[0].message.content\n",
|
| 378 |
+
"display(Markdown(business_idea))\n",
|
| 379 |
+
"\n",
|
| 380 |
+
"# Add to conversation (for future iterations)\n",
|
| 381 |
+
"#messages.append({\"role\": \"assistant\", \"content\": business_idea})"
|
| 382 |
+
]
|
| 383 |
+
},
|
| 384 |
+
{
|
| 385 |
+
"cell_type": "markdown",
|
| 386 |
+
"metadata": {},
|
| 387 |
+
"source": []
|
| 388 |
+
}
|
| 389 |
+
],
|
| 390 |
+
"metadata": {
|
| 391 |
+
"kernelspec": {
|
| 392 |
+
"display_name": ".venv",
|
| 393 |
+
"language": "python",
|
| 394 |
+
"name": "python3"
|
| 395 |
+
},
|
| 396 |
+
"language_info": {
|
| 397 |
+
"codemirror_mode": {
|
| 398 |
+
"name": "ipython",
|
| 399 |
+
"version": 3
|
| 400 |
+
},
|
| 401 |
+
"file_extension": ".py",
|
| 402 |
+
"mimetype": "text/x-python",
|
| 403 |
+
"name": "python",
|
| 404 |
+
"nbconvert_exporter": "python",
|
| 405 |
+
"pygments_lexer": "ipython3",
|
| 406 |
+
"version": "3.12.12"
|
| 407 |
+
}
|
| 408 |
+
},
|
| 409 |
+
"nbformat": 4,
|
| 410 |
+
"nbformat_minor": 2
|
| 411 |
+
}
|
community_contributions/1_lab1_moneek.ipynb
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
+
" <tr>\n",
|
| 16 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
+
" </td>\n",
|
| 19 |
+
" <td>\n",
|
| 20 |
+
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
+
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
+
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
+
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
+
" Well in that case, you're ready!!\n",
|
| 25 |
+
" </span>\n",
|
| 26 |
+
" </td>\n",
|
| 27 |
+
" </tr>\n",
|
| 28 |
+
"</table>"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"source": [
|
| 35 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
+
" <tr>\n",
|
| 37 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
+
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
+
" </td>\n",
|
| 40 |
+
" <td>\n",
|
| 41 |
+
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
+
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
+
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
+
" </span>\n",
|
| 45 |
+
" </td>\n",
|
| 46 |
+
" </tr>\n",
|
| 47 |
+
"</table>"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "markdown",
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"source": [
|
| 54 |
+
"### And please do remember to contact me if I can help\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
+
"\n",
|
| 61 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
+
"- Open extensions (View >> extensions)\n",
|
| 63 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
+
"\n",
|
| 67 |
+
"And then:\n",
|
| 68 |
+
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
+
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
+
"3. Enjoy!\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
+
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
+
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
+
"And then try again.\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
+
"`conda deactivate` \n",
|
| 81 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
+
"`conda config --set auto_activate_base false` \n",
|
| 83 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "code",
|
| 88 |
+
"execution_count": null,
|
| 89 |
+
"metadata": {},
|
| 90 |
+
"outputs": [],
|
| 91 |
+
"source": [
|
| 92 |
+
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"from dotenv import load_dotenv\n"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": null,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 104 |
+
"# If this returns false, see the next cell!\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"load_dotenv(override=True)"
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "markdown",
|
| 111 |
+
"metadata": {},
|
| 112 |
+
"source": [
|
| 113 |
+
"### Wait, did that just output `False`??\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 116 |
+
"\n",
|
| 117 |
+
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 118 |
+
"\n",
|
| 119 |
+
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"cell_type": "markdown",
|
| 124 |
+
"metadata": {},
|
| 125 |
+
"source": [
|
| 126 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 127 |
+
" <tr>\n",
|
| 128 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 129 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 130 |
+
" </td>\n",
|
| 131 |
+
" <td>\n",
|
| 132 |
+
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 133 |
+
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 134 |
+
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 135 |
+
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 136 |
+
" </span>\n",
|
| 137 |
+
" </td>\n",
|
| 138 |
+
" </tr>\n",
|
| 139 |
+
"</table>"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": null,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [],
|
| 147 |
+
"source": [
|
| 148 |
+
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"import os\n",
|
| 151 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"if openai_api_key:\n",
|
| 154 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 155 |
+
"else:\n",
|
| 156 |
+
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 157 |
+
" \n"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": null,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"# And now - the all important import statement\n",
|
| 167 |
+
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 168 |
+
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"from openai import OpenAI"
|
| 171 |
+
]
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"cell_type": "code",
|
| 175 |
+
"execution_count": null,
|
| 176 |
+
"metadata": {},
|
| 177 |
+
"outputs": [],
|
| 178 |
+
"source": [
|
| 179 |
+
"# And now we'll create an instance of the OpenAI class\n",
|
| 180 |
+
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 181 |
+
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 182 |
+
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 183 |
+
"\n",
|
| 184 |
+
"openai = OpenAI()"
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"cell_type": "code",
|
| 189 |
+
"execution_count": null,
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"outputs": [],
|
| 192 |
+
"source": [
|
| 193 |
+
"# Create a list of messages in the familiar OpenAI format\n",
|
| 194 |
+
"\n",
|
| 195 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 196 |
+
]
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"cell_type": "code",
|
| 200 |
+
"execution_count": null,
|
| 201 |
+
"metadata": {},
|
| 202 |
+
"outputs": [],
|
| 203 |
+
"source": [
|
| 204 |
+
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 205 |
+
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 206 |
+
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 207 |
+
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 208 |
+
"\n",
|
| 209 |
+
"response = openai.chat.completions.create(\n",
|
| 210 |
+
" model=\"gpt-4.1-nano\",\n",
|
| 211 |
+
" messages=messages\n",
|
| 212 |
+
")\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"print(response.choices[0].message.content)\n"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "code",
|
| 219 |
+
"execution_count": null,
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [],
|
| 222 |
+
"source": [
|
| 223 |
+
"# And now - let's ask for a question:\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 226 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 227 |
+
]
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"cell_type": "code",
|
| 231 |
+
"execution_count": null,
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"outputs": [],
|
| 234 |
+
"source": [
|
| 235 |
+
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 236 |
+
"\n",
|
| 237 |
+
"response = openai.chat.completions.create(\n",
|
| 238 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 239 |
+
" messages=messages\n",
|
| 240 |
+
")\n",
|
| 241 |
+
"\n",
|
| 242 |
+
"question = response.choices[0].message.content\n",
|
| 243 |
+
"\n",
|
| 244 |
+
"print(question)\n"
|
| 245 |
+
]
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"cell_type": "code",
|
| 249 |
+
"execution_count": null,
|
| 250 |
+
"metadata": {},
|
| 251 |
+
"outputs": [],
|
| 252 |
+
"source": [
|
| 253 |
+
"# form a new messages list\n",
|
| 254 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 255 |
+
]
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"cell_type": "code",
|
| 259 |
+
"execution_count": null,
|
| 260 |
+
"metadata": {},
|
| 261 |
+
"outputs": [],
|
| 262 |
+
"source": [
|
| 263 |
+
"# Ask it again\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"response = openai.chat.completions.create(\n",
|
| 266 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 267 |
+
" messages=messages\n",
|
| 268 |
+
")\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"answer = response.choices[0].message.content\n",
|
| 271 |
+
"print(answer)\n"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": null,
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"outputs": [],
|
| 279 |
+
"source": [
|
| 280 |
+
"from IPython.display import Markdown, display\n",
|
| 281 |
+
"\n",
|
| 282 |
+
"display(Markdown(answer))\n",
|
| 283 |
+
"\n"
|
| 284 |
+
]
|
| 285 |
+
},
|
| 286 |
+
{
|
| 287 |
+
"cell_type": "markdown",
|
| 288 |
+
"metadata": {},
|
| 289 |
+
"source": [
|
| 290 |
+
"# Congratulations!\n",
|
| 291 |
+
"\n",
|
| 292 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 293 |
+
"\n",
|
| 294 |
+
"Next time things get more interesting..."
|
| 295 |
+
]
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"cell_type": "markdown",
|
| 299 |
+
"metadata": {},
|
| 300 |
+
"source": [
|
| 301 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 302 |
+
" <tr>\n",
|
| 303 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 304 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 305 |
+
" </td>\n",
|
| 306 |
+
" <td>\n",
|
| 307 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 308 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 309 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 310 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 311 |
+
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 312 |
+
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 313 |
+
" </span>\n",
|
| 314 |
+
" </td>\n",
|
| 315 |
+
" </tr>\n",
|
| 316 |
+
"</table>"
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "code",
|
| 321 |
+
"execution_count": null,
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"outputs": [],
|
| 324 |
+
"source": [
|
| 325 |
+
"# First create the messages:\n",
|
| 326 |
+
"question = \"Pick a business area that may have agentic AI opportunities\"\n",
|
| 327 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n",
|
| 328 |
+
"\n",
|
| 329 |
+
"# Then make the first call:\n",
|
| 330 |
+
"\n",
|
| 331 |
+
"response = openai.chat.completions.create(\n",
|
| 332 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 333 |
+
" messages=messages\n",
|
| 334 |
+
")\n",
|
| 335 |
+
"\n",
|
| 336 |
+
"# Then read the business idea:\n",
|
| 337 |
+
"\n",
|
| 338 |
+
"business_idea = response.choices[0].message.content\n",
|
| 339 |
+
"print(business_idea)\n",
|
| 340 |
+
"\n",
|
| 341 |
+
"# And repeat! In the next message, include the business idea within the message"
|
| 342 |
+
]
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"cell_type": "code",
|
| 346 |
+
"execution_count": null,
|
| 347 |
+
"metadata": {},
|
| 348 |
+
"outputs": [],
|
| 349 |
+
"source": [
|
| 350 |
+
"messages = [{\"role\": \"user\", \"content\": question + \"\\n\\n\" + business_idea},\n",
|
| 351 |
+
" {\"role\": \"assistant\", \"content\": \"What is the pain point in this industry?\" }]\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"response = openai.chat.completions.create(\n",
|
| 354 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 355 |
+
" messages=messages\n",
|
| 356 |
+
")\n",
|
| 357 |
+
"\n",
|
| 358 |
+
"pain_point = response.choices[0].message.content\n",
|
| 359 |
+
"print(pain_point)"
|
| 360 |
+
]
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"cell_type": "code",
|
| 364 |
+
"execution_count": null,
|
| 365 |
+
"metadata": {},
|
| 366 |
+
"outputs": [],
|
| 367 |
+
"source": [
|
| 368 |
+
"messages = [{\"role\": \"user\", \"content\": question + \"\\n\\n\" + business_idea + \"\\n\\n\" + pain_point}, \n",
|
| 369 |
+
" {\"role\": \"assistant\", \"content\": \"What is the Agentic AI solution?\"}]\n",
|
| 370 |
+
"\n",
|
| 371 |
+
"response = openai.chat.completions.create(\n",
|
| 372 |
+
" model=\"gpt-4.1-mini\",\n",
|
| 373 |
+
" messages=messages\n",
|
| 374 |
+
")\n",
|
| 375 |
+
"\n",
|
| 376 |
+
"agentic_solution = response.choices[0].message.content\n",
|
| 377 |
+
"print(agentic_solution)\n"
|
| 378 |
+
]
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"cell_type": "markdown",
|
| 382 |
+
"metadata": {},
|
| 383 |
+
"source": []
|
| 384 |
+
}
|
| 385 |
+
],
|
| 386 |
+
"metadata": {
|
| 387 |
+
"kernelspec": {
|
| 388 |
+
"display_name": ".venv",
|
| 389 |
+
"language": "python",
|
| 390 |
+
"name": "python3"
|
| 391 |
+
},
|
| 392 |
+
"language_info": {
|
| 393 |
+
"codemirror_mode": {
|
| 394 |
+
"name": "ipython",
|
| 395 |
+
"version": 3
|
| 396 |
+
},
|
| 397 |
+
"file_extension": ".py",
|
| 398 |
+
"mimetype": "text/x-python",
|
| 399 |
+
"name": "python",
|
| 400 |
+
"nbconvert_exporter": "python",
|
| 401 |
+
"pygments_lexer": "ipython3",
|
| 402 |
+
"version": "3.12.11"
|
| 403 |
+
}
|
| 404 |
+
},
|
| 405 |
+
"nbformat": 4,
|
| 406 |
+
"nbformat_minor": 2
|
| 407 |
+
}
|
community_contributions/1_lab1_open_router.ipynb
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
+
" <tr>\n",
|
| 16 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
+
" </td>\n",
|
| 19 |
+
" <td>\n",
|
| 20 |
+
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
+
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
+
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 23 |
+
" Well in that case, you're ready!!\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "markdown",
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"source": [
|
| 34 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 35 |
+
" <tr>\n",
|
| 36 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 37 |
+
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 38 |
+
" </td>\n",
|
| 39 |
+
" <td>\n",
|
| 40 |
+
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 41 |
+
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 42 |
+
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 43 |
+
" </span>\n",
|
| 44 |
+
" </td>\n",
|
| 45 |
+
" </tr>\n",
|
| 46 |
+
"</table>"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"cell_type": "markdown",
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"source": [
|
| 53 |
+
"### And please do remember to contact me if I can help\n",
|
| 54 |
+
"\n",
|
| 55 |
+
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 56 |
+
"\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 59 |
+
"\n",
|
| 60 |
+
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 61 |
+
"- Open extensions (View >> extensions)\n",
|
| 62 |
+
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 63 |
+
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 64 |
+
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 65 |
+
"\n",
|
| 66 |
+
"And then:\n",
|
| 67 |
+
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 68 |
+
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 69 |
+
"3. Enjoy!\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 72 |
+
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 73 |
+
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 74 |
+
"2. In the Settings search bar, type \"venv\" \n",
|
| 75 |
+
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 76 |
+
"And then try again.\n",
|
| 77 |
+
"\n",
|
| 78 |
+
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 79 |
+
"`conda deactivate` \n",
|
| 80 |
+
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 81 |
+
"`conda config --set auto_activate_base false` \n",
|
| 82 |
+
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 83 |
+
]
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"cell_type": "code",
|
| 87 |
+
"execution_count": 76,
|
| 88 |
+
"metadata": {},
|
| 89 |
+
"outputs": [],
|
| 90 |
+
"source": [
|
| 91 |
+
"# First let's do an import\n",
|
| 92 |
+
"from dotenv import load_dotenv\n"
|
| 93 |
+
]
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"cell_type": "code",
|
| 97 |
+
"execution_count": null,
|
| 98 |
+
"metadata": {},
|
| 99 |
+
"outputs": [],
|
| 100 |
+
"source": [
|
| 101 |
+
"# Next it's time to load the API keys into environment variables\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"load_dotenv(override=True)"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "code",
|
| 108 |
+
"execution_count": null,
|
| 109 |
+
"metadata": {},
|
| 110 |
+
"outputs": [],
|
| 111 |
+
"source": [
|
| 112 |
+
"# Check the keys\n",
|
| 113 |
+
"\n",
|
| 114 |
+
"import os\n",
|
| 115 |
+
"open_router_api_key = os.getenv('OPEN_ROUTER_API_KEY')\n",
|
| 116 |
+
"\n",
|
| 117 |
+
"if open_router_api_key:\n",
|
| 118 |
+
" print(f\"Open router API Key exists and begins {open_router_api_key[:8]}\")\n",
|
| 119 |
+
"else:\n",
|
| 120 |
+
" print(\"Open router API Key not set - please head to the troubleshooting guide in the setup folder\")\n"
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"cell_type": "code",
|
| 125 |
+
"execution_count": 79,
|
| 126 |
+
"metadata": {},
|
| 127 |
+
"outputs": [],
|
| 128 |
+
"source": [
|
| 129 |
+
"from openai import OpenAI"
|
| 130 |
+
]
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"cell_type": "code",
|
| 134 |
+
"execution_count": 80,
|
| 135 |
+
"metadata": {},
|
| 136 |
+
"outputs": [],
|
| 137 |
+
"source": [
|
| 138 |
+
"# Initialize the client to point at OpenRouter instead of OpenAI\n",
|
| 139 |
+
"# You can use the exact same OpenAI Python package—just swap the base_url!\n",
|
| 140 |
+
"client = OpenAI(\n",
|
| 141 |
+
" base_url=\"https://openrouter.ai/api/v1\",\n",
|
| 142 |
+
" api_key=open_router_api_key\n",
|
| 143 |
+
")"
|
| 144 |
+
]
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"cell_type": "code",
|
| 148 |
+
"execution_count": 81,
|
| 149 |
+
"metadata": {},
|
| 150 |
+
"outputs": [],
|
| 151 |
+
"source": [
|
| 152 |
+
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 153 |
+
]
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"cell_type": "code",
|
| 157 |
+
"execution_count": null,
|
| 158 |
+
"metadata": {},
|
| 159 |
+
"outputs": [],
|
| 160 |
+
"source": [
|
| 161 |
+
"client = OpenAI(\n",
|
| 162 |
+
" base_url=\"https://openrouter.ai/api/v1\",\n",
|
| 163 |
+
" api_key=open_router_api_key\n",
|
| 164 |
+
")\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"resp = client.chat.completions.create(\n",
|
| 167 |
+
" # Select a model from https://openrouter.ai/models and provide the model name here\n",
|
| 168 |
+
" model=\"meta-llama/llama-3.3-8b-instruct:free\",\n",
|
| 169 |
+
" messages=messages\n",
|
| 170 |
+
")\n",
|
| 171 |
+
"print(resp.choices[0].message.content)"
|
| 172 |
+
]
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"cell_type": "code",
|
| 176 |
+
"execution_count": 83,
|
| 177 |
+
"metadata": {},
|
| 178 |
+
"outputs": [],
|
| 179 |
+
"source": [
|
| 180 |
+
"# And now - let's ask for a question:\n",
|
| 181 |
+
"\n",
|
| 182 |
+
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 183 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 184 |
+
]
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"cell_type": "code",
|
| 188 |
+
"execution_count": null,
|
| 189 |
+
"metadata": {},
|
| 190 |
+
"outputs": [],
|
| 191 |
+
"source": [
|
| 192 |
+
"response = client.chat.completions.create(\n",
|
| 193 |
+
" model=\"meta-llama/llama-3.3-8b-instruct:free\",\n",
|
| 194 |
+
" messages=messages\n",
|
| 195 |
+
")\n",
|
| 196 |
+
"\n",
|
| 197 |
+
"question = response.choices[0].message.content\n",
|
| 198 |
+
"\n",
|
| 199 |
+
"print(question)"
|
| 200 |
+
]
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"cell_type": "code",
|
| 204 |
+
"execution_count": 85,
|
| 205 |
+
"metadata": {},
|
| 206 |
+
"outputs": [],
|
| 207 |
+
"source": [
|
| 208 |
+
"# form a new messages list\n",
|
| 209 |
+
"\n",
|
| 210 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 211 |
+
]
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"cell_type": "code",
|
| 215 |
+
"execution_count": null,
|
| 216 |
+
"metadata": {},
|
| 217 |
+
"outputs": [],
|
| 218 |
+
"source": [
|
| 219 |
+
"# Ask it again\n",
|
| 220 |
+
"\n",
|
| 221 |
+
"response = client.chat.completions.create(\n",
|
| 222 |
+
" model=\"meta-llama/llama-3.3-8b-instruct:free\",\n",
|
| 223 |
+
" messages=messages\n",
|
| 224 |
+
")\n",
|
| 225 |
+
"\n",
|
| 226 |
+
"answer = response.choices[0].message.content\n",
|
| 227 |
+
"print(answer)"
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"cell_type": "code",
|
| 232 |
+
"execution_count": null,
|
| 233 |
+
"metadata": {},
|
| 234 |
+
"outputs": [],
|
| 235 |
+
"source": [
|
| 236 |
+
"from IPython.display import Markdown, display\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"display(Markdown(answer))\n",
|
| 239 |
+
"\n"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "markdown",
|
| 244 |
+
"metadata": {},
|
| 245 |
+
"source": [
|
| 246 |
+
"# Congratulations!\n",
|
| 247 |
+
"\n",
|
| 248 |
+
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 249 |
+
"\n",
|
| 250 |
+
"Next time things get more interesting..."
|
| 251 |
+
]
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"cell_type": "markdown",
|
| 255 |
+
"metadata": {},
|
| 256 |
+
"source": [
|
| 257 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 258 |
+
" <tr>\n",
|
| 259 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 260 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 261 |
+
" </td>\n",
|
| 262 |
+
" <td>\n",
|
| 263 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 264 |
+
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 265 |
+
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 266 |
+
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 267 |
+
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 268 |
+
" </span>\n",
|
| 269 |
+
" </td>\n",
|
| 270 |
+
" </tr>\n",
|
| 271 |
+
"</table>"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": null,
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"outputs": [],
|
| 279 |
+
"source": [
|
| 280 |
+
"# First create the messages:\n",
|
| 281 |
+
"\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"messages = [\"Something here\"]\n",
|
| 284 |
+
"\n",
|
| 285 |
+
"# Then make the first call:\n",
|
| 286 |
+
"\n",
|
| 287 |
+
"response =\n",
|
| 288 |
+
"\n",
|
| 289 |
+
"# Then read the business idea:\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"business_idea = response.\n",
|
| 292 |
+
"\n",
|
| 293 |
+
"# And repeat!"
|
| 294 |
+
]
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"cell_type": "markdown",
|
| 298 |
+
"metadata": {},
|
| 299 |
+
"source": []
|
| 300 |
+
}
|
| 301 |
+
],
|
| 302 |
+
"metadata": {
|
| 303 |
+
"kernelspec": {
|
| 304 |
+
"display_name": ".venv",
|
| 305 |
+
"language": "python",
|
| 306 |
+
"name": "python3"
|
| 307 |
+
},
|
| 308 |
+
"language_info": {
|
| 309 |
+
"codemirror_mode": {
|
| 310 |
+
"name": "ipython",
|
| 311 |
+
"version": 3
|
| 312 |
+
},
|
| 313 |
+
"file_extension": ".py",
|
| 314 |
+
"mimetype": "text/x-python",
|
| 315 |
+
"name": "python",
|
| 316 |
+
"nbconvert_exporter": "python",
|
| 317 |
+
"pygments_lexer": "ipython3",
|
| 318 |
+
"version": "3.12.7"
|
| 319 |
+
}
|
| 320 |
+
},
|
| 321 |
+
"nbformat": 4,
|
| 322 |
+
"nbformat_minor": 2
|
| 323 |
+
}
|
community_contributions/1_lab2_Kaushik_Parallelization.ipynb
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import os\n",
|
| 10 |
+
"import json\n",
|
| 11 |
+
"from dotenv import load_dotenv\n",
|
| 12 |
+
"from openai import OpenAI\n",
|
| 13 |
+
"from IPython.display import Markdown"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"cell_type": "markdown",
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"source": [
|
| 20 |
+
"### Refresh dot env"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"cell_type": "code",
|
| 25 |
+
"execution_count": null,
|
| 26 |
+
"metadata": {},
|
| 27 |
+
"outputs": [],
|
| 28 |
+
"source": [
|
| 29 |
+
"load_dotenv(override=True)"
|
| 30 |
+
]
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"cell_type": "code",
|
| 34 |
+
"execution_count": 3,
|
| 35 |
+
"metadata": {},
|
| 36 |
+
"outputs": [],
|
| 37 |
+
"source": [
|
| 38 |
+
"open_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
|
| 39 |
+
"google_api_key = os.getenv(\"GOOGLE_API_KEY\")"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"cell_type": "markdown",
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"source": [
|
| 46 |
+
"### Create initial query to get challange reccomendation"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"cell_type": "code",
|
| 51 |
+
"execution_count": 4,
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"outputs": [],
|
| 54 |
+
"source": [
|
| 55 |
+
"query = 'Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. '\n",
|
| 56 |
+
"query += 'Answer only with the question, no explanation.'\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"messages = [{'role':'user', 'content':query}]"
|
| 59 |
+
]
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "code",
|
| 63 |
+
"execution_count": null,
|
| 64 |
+
"metadata": {},
|
| 65 |
+
"outputs": [],
|
| 66 |
+
"source": [
|
| 67 |
+
"print(messages)"
|
| 68 |
+
]
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"cell_type": "markdown",
|
| 72 |
+
"metadata": {},
|
| 73 |
+
"source": [
|
| 74 |
+
"### Call openai gpt-4o-mini "
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"cell_type": "code",
|
| 79 |
+
"execution_count": 6,
|
| 80 |
+
"metadata": {},
|
| 81 |
+
"outputs": [],
|
| 82 |
+
"source": [
|
| 83 |
+
"openai = OpenAI()\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"response = openai.chat.completions.create(\n",
|
| 86 |
+
" messages=messages,\n",
|
| 87 |
+
" model='gpt-4o-mini'\n",
|
| 88 |
+
")\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"challange = response.choices[0].message.content\n"
|
| 91 |
+
]
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"cell_type": "code",
|
| 95 |
+
"execution_count": null,
|
| 96 |
+
"metadata": {},
|
| 97 |
+
"outputs": [],
|
| 98 |
+
"source": [
|
| 99 |
+
"print(challange)"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"cell_type": "code",
|
| 104 |
+
"execution_count": 8,
|
| 105 |
+
"metadata": {},
|
| 106 |
+
"outputs": [],
|
| 107 |
+
"source": [
|
| 108 |
+
"competitors = []\n",
|
| 109 |
+
"answers = []"
|
| 110 |
+
]
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"cell_type": "markdown",
|
| 114 |
+
"metadata": {},
|
| 115 |
+
"source": [
|
| 116 |
+
"### Create messages with the challange query"
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"cell_type": "code",
|
| 121 |
+
"execution_count": 9,
|
| 122 |
+
"metadata": {},
|
| 123 |
+
"outputs": [],
|
| 124 |
+
"source": [
|
| 125 |
+
"messages = [{'role':'user', 'content':challange}]"
|
| 126 |
+
]
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"cell_type": "code",
|
| 130 |
+
"execution_count": null,
|
| 131 |
+
"metadata": {},
|
| 132 |
+
"outputs": [],
|
| 133 |
+
"source": [
|
| 134 |
+
"print(messages)"
|
| 135 |
+
]
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"cell_type": "code",
|
| 139 |
+
"execution_count": null,
|
| 140 |
+
"metadata": {},
|
| 141 |
+
"outputs": [],
|
| 142 |
+
"source": [
|
| 143 |
+
"!ollama pull llama3.2"
|
| 144 |
+
]
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"cell_type": "code",
|
| 148 |
+
"execution_count": 12,
|
| 149 |
+
"metadata": {},
|
| 150 |
+
"outputs": [],
|
| 151 |
+
"source": [
|
| 152 |
+
"from threading import Thread"
|
| 153 |
+
]
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"cell_type": "code",
|
| 157 |
+
"execution_count": 13,
|
| 158 |
+
"metadata": {},
|
| 159 |
+
"outputs": [],
|
| 160 |
+
"source": [
|
| 161 |
+
"def gpt_mini_processor():\n",
|
| 162 |
+
" modleName = 'gpt-4o-mini'\n",
|
| 163 |
+
" competitors.append(modleName)\n",
|
| 164 |
+
" response_gpt = openai.chat.completions.create(\n",
|
| 165 |
+
" messages=messages,\n",
|
| 166 |
+
" model=modleName\n",
|
| 167 |
+
" )\n",
|
| 168 |
+
" answers.append(response_gpt.choices[0].message.content)\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"def gemini_processor():\n",
|
| 171 |
+
" gemini = OpenAI(api_key=google_api_key, base_url='https://generativelanguage.googleapis.com/v1beta/openai/')\n",
|
| 172 |
+
" modleName = 'gemini-2.0-flash'\n",
|
| 173 |
+
" competitors.append(modleName)\n",
|
| 174 |
+
" response_gemini = gemini.chat.completions.create(\n",
|
| 175 |
+
" messages=messages,\n",
|
| 176 |
+
" model=modleName\n",
|
| 177 |
+
" )\n",
|
| 178 |
+
" answers.append(response_gemini.choices[0].message.content)\n",
|
| 179 |
+
"\n",
|
| 180 |
+
"def llama_processor():\n",
|
| 181 |
+
" ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 182 |
+
" modleName = 'llama3.2'\n",
|
| 183 |
+
" competitors.append(modleName)\n",
|
| 184 |
+
" response_llama = ollama.chat.completions.create(\n",
|
| 185 |
+
" messages=messages,\n",
|
| 186 |
+
" model=modleName\n",
|
| 187 |
+
" )\n",
|
| 188 |
+
" answers.append(response_llama.choices[0].message.content)"
|
| 189 |
+
]
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"cell_type": "markdown",
|
| 193 |
+
"metadata": {},
|
| 194 |
+
"source": [
|
| 195 |
+
"### Paraller execution of LLM calls"
|
| 196 |
+
]
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"cell_type": "code",
|
| 200 |
+
"execution_count": 14,
|
| 201 |
+
"metadata": {},
|
| 202 |
+
"outputs": [],
|
| 203 |
+
"source": [
|
| 204 |
+
"thread1 = Thread(target=gpt_mini_processor)\n",
|
| 205 |
+
"thread2 = Thread(target=gemini_processor)\n",
|
| 206 |
+
"thread3 = Thread(target=llama_processor)\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"thread1.start()\n",
|
| 209 |
+
"thread2.start()\n",
|
| 210 |
+
"thread3.start()\n",
|
| 211 |
+
"\n",
|
| 212 |
+
"thread1.join()\n",
|
| 213 |
+
"thread2.join()\n",
|
| 214 |
+
"thread3.join()"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "code",
|
| 219 |
+
"execution_count": null,
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [],
|
| 222 |
+
"source": [
|
| 223 |
+
"print(competitors)\n",
|
| 224 |
+
"print(answers)"
|
| 225 |
+
]
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"cell_type": "code",
|
| 229 |
+
"execution_count": null,
|
| 230 |
+
"metadata": {},
|
| 231 |
+
"outputs": [],
|
| 232 |
+
"source": [
|
| 233 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 234 |
+
" print(f'Competitor:{competitor}\\n\\n{answer}')"
|
| 235 |
+
]
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"cell_type": "code",
|
| 239 |
+
"execution_count": 17,
|
| 240 |
+
"metadata": {},
|
| 241 |
+
"outputs": [],
|
| 242 |
+
"source": [
|
| 243 |
+
"together = ''\n",
|
| 244 |
+
"for index, answer in enumerate(answers):\n",
|
| 245 |
+
" together += f'# Response from competitor {index + 1}\\n\\n'\n",
|
| 246 |
+
" together += answer + '\\n\\n'"
|
| 247 |
+
]
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"cell_type": "code",
|
| 251 |
+
"execution_count": null,
|
| 252 |
+
"metadata": {},
|
| 253 |
+
"outputs": [],
|
| 254 |
+
"source": [
|
| 255 |
+
"print(together)"
|
| 256 |
+
]
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"cell_type": "markdown",
|
| 260 |
+
"metadata": {},
|
| 261 |
+
"source": [
|
| 262 |
+
"### Prompt to judge the LLM results"
|
| 263 |
+
]
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"cell_type": "code",
|
| 267 |
+
"execution_count": 19,
|
| 268 |
+
"metadata": {},
|
| 269 |
+
"outputs": [],
|
| 270 |
+
"source": [
|
| 271 |
+
"to_judge = f'''You are judging a competition between {len(competitors)} competitors.\n",
|
| 272 |
+
"Each model has been given this question:\n",
|
| 273 |
+
"\n",
|
| 274 |
+
"{challange}\n",
|
| 275 |
+
"\n",
|
| 276 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 277 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 278 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 279 |
+
"\n",
|
| 280 |
+
"Here are the responses from each competitor:\n",
|
| 281 |
+
"\n",
|
| 282 |
+
"{together}\n",
|
| 283 |
+
"\n",
|
| 284 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n",
|
| 285 |
+
"\n",
|
| 286 |
+
"'''"
|
| 287 |
+
]
|
| 288 |
+
},
|
| 289 |
+
{
|
| 290 |
+
"cell_type": "code",
|
| 291 |
+
"execution_count": 20,
|
| 292 |
+
"metadata": {},
|
| 293 |
+
"outputs": [],
|
| 294 |
+
"source": [
|
| 295 |
+
"to_judge_message = [{'role':'user', 'content':to_judge}]"
|
| 296 |
+
]
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"cell_type": "markdown",
|
| 300 |
+
"metadata": {},
|
| 301 |
+
"source": [
|
| 302 |
+
"### Execute o3-mini to analyze the LLM results"
|
| 303 |
+
]
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"cell_type": "code",
|
| 307 |
+
"execution_count": null,
|
| 308 |
+
"metadata": {},
|
| 309 |
+
"outputs": [],
|
| 310 |
+
"source": [
|
| 311 |
+
"openai = OpenAI()\n",
|
| 312 |
+
"response = openai.chat.completions.create(\n",
|
| 313 |
+
" messages=to_judge_message,\n",
|
| 314 |
+
" model='o3-mini'\n",
|
| 315 |
+
")\n",
|
| 316 |
+
"result = response.choices[0].message.content\n",
|
| 317 |
+
"print(result)"
|
| 318 |
+
]
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"cell_type": "code",
|
| 322 |
+
"execution_count": null,
|
| 323 |
+
"metadata": {},
|
| 324 |
+
"outputs": [],
|
| 325 |
+
"source": [
|
| 326 |
+
"results_dict = json.loads(result)\n",
|
| 327 |
+
"ranks = results_dict[\"results\"]\n",
|
| 328 |
+
"for index, result in enumerate(ranks):\n",
|
| 329 |
+
" competitor = competitors[int(result)-1]\n",
|
| 330 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 331 |
+
]
|
| 332 |
+
}
|
| 333 |
+
],
|
| 334 |
+
"metadata": {
|
| 335 |
+
"kernelspec": {
|
| 336 |
+
"display_name": ".venv",
|
| 337 |
+
"language": "python",
|
| 338 |
+
"name": "python3"
|
| 339 |
+
},
|
| 340 |
+
"language_info": {
|
| 341 |
+
"codemirror_mode": {
|
| 342 |
+
"name": "ipython",
|
| 343 |
+
"version": 3
|
| 344 |
+
},
|
| 345 |
+
"file_extension": ".py",
|
| 346 |
+
"mimetype": "text/x-python",
|
| 347 |
+
"name": "python",
|
| 348 |
+
"nbconvert_exporter": "python",
|
| 349 |
+
"pygments_lexer": "ipython3",
|
| 350 |
+
"version": "3.12.10"
|
| 351 |
+
}
|
| 352 |
+
},
|
| 353 |
+
"nbformat": 4,
|
| 354 |
+
"nbformat_minor": 2
|
| 355 |
+
}
|
community_contributions/1_lab2_Routing_Workflow.ipynb
ADDED
|
@@ -0,0 +1,514 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Judging and Routing — Optimizing Resource Usage by Evaluating Problem Complexity"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "markdown",
|
| 12 |
+
"metadata": {},
|
| 13 |
+
"source": [
|
| 14 |
+
"In the original Lab 2, we explored the **Orchestrator–Worker pattern**, where a planner sent the same question to multiple agents, and a judge assessed their responses to evaluate agent intelligence.\n",
|
| 15 |
+
"\n",
|
| 16 |
+
"In this notebook, we extend that design by adding multiple judges and a routing component to optimize model usage based on task complexity. "
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "markdown",
|
| 21 |
+
"metadata": {},
|
| 22 |
+
"source": [
|
| 23 |
+
"## Imports and Environment Setup"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"cell_type": "code",
|
| 28 |
+
"execution_count": 1,
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"outputs": [],
|
| 31 |
+
"source": [
|
| 32 |
+
"import os\n",
|
| 33 |
+
"import json\n",
|
| 34 |
+
"from dotenv import load_dotenv\n",
|
| 35 |
+
"from openai import OpenAI\n",
|
| 36 |
+
"from anthropic import Anthropic\n",
|
| 37 |
+
"from IPython.display import Markdown, display"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": null,
|
| 43 |
+
"metadata": {},
|
| 44 |
+
"outputs": [],
|
| 45 |
+
"source": [
|
| 46 |
+
"load_dotenv(override=True)\n",
|
| 47 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 48 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 49 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 50 |
+
"if openai_api_key and google_api_key and deepseek_api_key:\n",
|
| 51 |
+
" print(\"All keys were loaded successfully\")"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "code",
|
| 56 |
+
"execution_count": null,
|
| 57 |
+
"metadata": {},
|
| 58 |
+
"outputs": [],
|
| 59 |
+
"source": [
|
| 60 |
+
"!ollama pull llama3.2\n",
|
| 61 |
+
"!ollama pull mistral"
|
| 62 |
+
]
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"cell_type": "markdown",
|
| 66 |
+
"metadata": {},
|
| 67 |
+
"source": [
|
| 68 |
+
"## Creating Models"
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"cell_type": "markdown",
|
| 73 |
+
"metadata": {},
|
| 74 |
+
"source": [
|
| 75 |
+
"The notebook uses instances of GPT, Gemini and DeepSeek APIs, along with two local models served via Ollama: ```llama3.2``` and ```mistral```."
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"cell_type": "code",
|
| 80 |
+
"execution_count": 4,
|
| 81 |
+
"metadata": {},
|
| 82 |
+
"outputs": [],
|
| 83 |
+
"source": [
|
| 84 |
+
"model_specs = {\n",
|
| 85 |
+
" \"gpt-4o-mini\" : None,\n",
|
| 86 |
+
" \"gemini-2.0-flash\": {\n",
|
| 87 |
+
" \"api_key\" : google_api_key,\n",
|
| 88 |
+
" \"url\" : \"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
| 89 |
+
" },\n",
|
| 90 |
+
" \"deepseek-chat\" : {\n",
|
| 91 |
+
" \"api_key\" : deepseek_api_key,\n",
|
| 92 |
+
" \"url\" : \"https://api.deepseek.com/v1\"\n",
|
| 93 |
+
" },\n",
|
| 94 |
+
" \"llama3.2\" : {\n",
|
| 95 |
+
" \"api_key\" : \"ollama\",\n",
|
| 96 |
+
" \"url\" : \"http://localhost:11434/v1\"\n",
|
| 97 |
+
" },\n",
|
| 98 |
+
" \"mistral\" : {\n",
|
| 99 |
+
" \"api_key\" : \"ollama\",\n",
|
| 100 |
+
" \"url\" : \"http://localhost:11434/v1\"\n",
|
| 101 |
+
" }\n",
|
| 102 |
+
"}\n",
|
| 103 |
+
"\n",
|
| 104 |
+
"def create_model(model_name):\n",
|
| 105 |
+
" spec = model_specs[model_name]\n",
|
| 106 |
+
" if spec is None:\n",
|
| 107 |
+
" return OpenAI()\n",
|
| 108 |
+
" \n",
|
| 109 |
+
" return OpenAI(api_key=spec[\"api_key\"], base_url=spec[\"url\"])"
|
| 110 |
+
]
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"cell_type": "code",
|
| 114 |
+
"execution_count": 5,
|
| 115 |
+
"metadata": {},
|
| 116 |
+
"outputs": [],
|
| 117 |
+
"source": [
|
| 118 |
+
"orchestrator_model = \"gemini-2.0-flash\"\n",
|
| 119 |
+
"generator = create_model(orchestrator_model)\n",
|
| 120 |
+
"router = create_model(orchestrator_model)\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"qa_models = {\n",
|
| 123 |
+
" model_name : create_model(model_name) \n",
|
| 124 |
+
" for model_name in model_specs.keys()\n",
|
| 125 |
+
"}\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"judges = {\n",
|
| 128 |
+
" model_name : create_model(model_name) \n",
|
| 129 |
+
" for model_name, specs in model_specs.items() \n",
|
| 130 |
+
" if not(specs) or specs[\"api_key\"] != \"ollama\"\n",
|
| 131 |
+
"}"
|
| 132 |
+
]
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"cell_type": "markdown",
|
| 136 |
+
"metadata": {},
|
| 137 |
+
"source": [
|
| 138 |
+
"## Orchestrator-Worker Workflow"
|
| 139 |
+
]
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"cell_type": "markdown",
|
| 143 |
+
"metadata": {},
|
| 144 |
+
"source": [
|
| 145 |
+
"First, we generate a question to evaluate the intelligence of each LLM."
|
| 146 |
+
]
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"cell_type": "code",
|
| 150 |
+
"execution_count": null,
|
| 151 |
+
"metadata": {},
|
| 152 |
+
"outputs": [],
|
| 153 |
+
"source": [
|
| 154 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs \"\n",
|
| 155 |
+
"request += \"to evaluate and rank them based on their intelligence. \" \n",
|
| 156 |
+
"request += \"Answer **only** with the question, no explanation or preamble.\"\n",
|
| 157 |
+
"\n",
|
| 158 |
+
"messages = [{\"role\": \"user\", \"content\": request}]\n",
|
| 159 |
+
"messages"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": 7,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"outputs": [],
|
| 167 |
+
"source": [
|
| 168 |
+
"response = generator.chat.completions.create(\n",
|
| 169 |
+
" model=orchestrator_model,\n",
|
| 170 |
+
" messages=messages,\n",
|
| 171 |
+
")\n",
|
| 172 |
+
"eval_question = response.choices[0].message.content"
|
| 173 |
+
]
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"cell_type": "code",
|
| 177 |
+
"execution_count": null,
|
| 178 |
+
"metadata": {},
|
| 179 |
+
"outputs": [],
|
| 180 |
+
"source": [
|
| 181 |
+
"display(Markdown(eval_question))"
|
| 182 |
+
]
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"cell_type": "markdown",
|
| 186 |
+
"metadata": {},
|
| 187 |
+
"source": [
|
| 188 |
+
"### Task Parallelization"
|
| 189 |
+
]
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"cell_type": "markdown",
|
| 193 |
+
"metadata": {},
|
| 194 |
+
"source": [
|
| 195 |
+
"Now, having the question and all the models instantiated it's time to see what each model has to say about the complex task it was given."
|
| 196 |
+
]
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"cell_type": "code",
|
| 200 |
+
"execution_count": null,
|
| 201 |
+
"metadata": {},
|
| 202 |
+
"outputs": [],
|
| 203 |
+
"source": [
|
| 204 |
+
"question = [{\"role\": \"user\", \"content\": eval_question}]\n",
|
| 205 |
+
"answers = []\n",
|
| 206 |
+
"competitors = []\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"for name, model in qa_models.items():\n",
|
| 209 |
+
" response = model.chat.completions.create(model=name, messages=question)\n",
|
| 210 |
+
" answer = response.choices[0].message.content\n",
|
| 211 |
+
" competitors.append(name)\n",
|
| 212 |
+
" answers.append(answer)\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"answers"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"cell_type": "code",
|
| 219 |
+
"execution_count": null,
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [],
|
| 222 |
+
"source": [
|
| 223 |
+
"report = \"# Answer report for each of the 5 models\\n\\n\"\n",
|
| 224 |
+
"report += \"\\n\\n\".join([f\"## **Model: {model}**\\n\\n{answer}\" for model, answer in zip(competitors, answers)])\n",
|
| 225 |
+
"display(Markdown(report))"
|
| 226 |
+
]
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"cell_type": "markdown",
|
| 230 |
+
"metadata": {},
|
| 231 |
+
"source": [
|
| 232 |
+
"### Synthetizer/Judge"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "markdown",
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"source": [
|
| 239 |
+
"The Judge Agents ranks the LLM responses based on coherence and relevance to the evaluation prompt. Judges vote and the final LLM ranking is based on the aggregated ranking of all three judges."
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "code",
|
| 244 |
+
"execution_count": null,
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"outputs": [],
|
| 247 |
+
"source": [
|
| 248 |
+
"together = \"\"\n",
|
| 249 |
+
"for index, answer in enumerate(answers):\n",
|
| 250 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 251 |
+
" together += answer + \"\\n\\n\"\n",
|
| 252 |
+
"\n",
|
| 253 |
+
"together"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"execution_count": 12,
|
| 259 |
+
"metadata": {},
|
| 260 |
+
"outputs": [],
|
| 261 |
+
"source": [
|
| 262 |
+
"judge_prompt = f\"\"\"\n",
|
| 263 |
+
" You are judging a competition between {len(competitors)} LLM competitors.\n",
|
| 264 |
+
" Each model has been given this nuanced question to evaluate their intelligence:\n",
|
| 265 |
+
"\n",
|
| 266 |
+
" {eval_question}\n",
|
| 267 |
+
"\n",
|
| 268 |
+
" Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 269 |
+
" Respond with JSON, and only JSON, with the following format:\n",
|
| 270 |
+
" {{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 271 |
+
" With 'best competitor number being ONLY the number', for instance:\n",
|
| 272 |
+
" {{\"results\": [\"5\", \"2\", \"4\", ...]}}\n",
|
| 273 |
+
" Here are the responses from each competitor:\n",
|
| 274 |
+
"\n",
|
| 275 |
+
" {together}\n",
|
| 276 |
+
"\n",
|
| 277 |
+
" Now respond with the JSON with the ranked order of the competitors, nothing else. Do NOT include MARKDOWN FORMATTING or CODE BLOCKS. ONLY the JSON\n",
|
| 278 |
+
" \"\"\"\n",
|
| 279 |
+
"\n",
|
| 280 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge_prompt}]"
|
| 281 |
+
]
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"cell_type": "code",
|
| 285 |
+
"execution_count": null,
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"outputs": [],
|
| 288 |
+
"source": [
|
| 289 |
+
"from collections import defaultdict\n",
|
| 290 |
+
"import re\n",
|
| 291 |
+
"\n",
|
| 292 |
+
"N = len(competitors)\n",
|
| 293 |
+
"scores = defaultdict(int)\n",
|
| 294 |
+
"for judge_name, judge in judges.items():\n",
|
| 295 |
+
" response = judge.chat.completions.create(\n",
|
| 296 |
+
" model=judge_name,\n",
|
| 297 |
+
" messages=judge_messages,\n",
|
| 298 |
+
" )\n",
|
| 299 |
+
" response = response.choices[0].message.content\n",
|
| 300 |
+
" response_json = re.findall(r'\\{.*?\\}', response)[0]\n",
|
| 301 |
+
" results = json.loads(response_json)[\"results\"]\n",
|
| 302 |
+
" ranks = [int(result) for result in results]\n",
|
| 303 |
+
" print(f\"Judge {judge_name} ranking:\")\n",
|
| 304 |
+
" for i, c in enumerate(ranks):\n",
|
| 305 |
+
" model_name = competitors[c - 1]\n",
|
| 306 |
+
" print(f\"#{i+1} : {model_name}\")\n",
|
| 307 |
+
" scores[c - 1] += (N - i)\n",
|
| 308 |
+
" print()"
|
| 309 |
+
]
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"cell_type": "code",
|
| 313 |
+
"execution_count": null,
|
| 314 |
+
"metadata": {},
|
| 315 |
+
"outputs": [],
|
| 316 |
+
"source": [
|
| 317 |
+
"sorted_indices = sorted(scores, key=scores.get)\n",
|
| 318 |
+
"\n",
|
| 319 |
+
"# Convert to model names\n",
|
| 320 |
+
"ranked_model_names = [competitors[i] for i in sorted_indices]\n",
|
| 321 |
+
"\n",
|
| 322 |
+
"print(\"Final ranking from best to worst:\")\n",
|
| 323 |
+
"for i, name in enumerate(ranked_model_names[::-1], 1):\n",
|
| 324 |
+
" print(f\"#{i}: {name}\")"
|
| 325 |
+
]
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"cell_type": "markdown",
|
| 329 |
+
"metadata": {},
|
| 330 |
+
"source": [
|
| 331 |
+
"## Routing Workflow"
|
| 332 |
+
]
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"cell_type": "markdown",
|
| 336 |
+
"metadata": {},
|
| 337 |
+
"source": [
|
| 338 |
+
"We now define a routing agent responsible for classifying task complexity and delegating the prompt to the most appropriate model."
|
| 339 |
+
]
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"cell_type": "code",
|
| 343 |
+
"execution_count": 15,
|
| 344 |
+
"metadata": {},
|
| 345 |
+
"outputs": [],
|
| 346 |
+
"source": [
|
| 347 |
+
"def classify_question_complexity(question: str, routing_agent, routing_model) -> int:\n",
|
| 348 |
+
" \"\"\"\n",
|
| 349 |
+
" Ask an LLM to classify the question complexity from 1 (easy) to 5 (very hard).\n",
|
| 350 |
+
" \"\"\"\n",
|
| 351 |
+
" prompt = f\"\"\"\n",
|
| 352 |
+
" You are a classifier responsible for assigning a complexity level to user questions, based on how difficult they would be for a language model to answer.\n",
|
| 353 |
+
"\n",
|
| 354 |
+
" Please read the question below and assign a complexity score from 1 to 5:\n",
|
| 355 |
+
"\n",
|
| 356 |
+
" - Level 1: Very simple factual or definitional question (e.g., “What is the capital of France?”)\n",
|
| 357 |
+
" - Level 2: Slightly more involved, requiring basic reasoning or comparison\n",
|
| 358 |
+
" - Level 3: Moderate complexity, requiring synthesis, context understanding, or multi-part answers\n",
|
| 359 |
+
" - Level 4: High complexity, requiring abstract thinking, ethical judgment, or creative generation\n",
|
| 360 |
+
" - Level 5: Extremely challenging, requiring deep reasoning, philosophical reflection, or long-term multi-step inference\n",
|
| 361 |
+
"\n",
|
| 362 |
+
" Respond ONLY with a single integer between 1 and 5 that best reflects the complexity of the question.\n",
|
| 363 |
+
"\n",
|
| 364 |
+
" Question:\n",
|
| 365 |
+
" {question}\n",
|
| 366 |
+
" \"\"\"\n",
|
| 367 |
+
"\n",
|
| 368 |
+
" response = routing_agent.chat.completions.create(\n",
|
| 369 |
+
" model=routing_model,\n",
|
| 370 |
+
" messages=[{\"role\": \"user\", \"content\": prompt}]\n",
|
| 371 |
+
" )\n",
|
| 372 |
+
" try:\n",
|
| 373 |
+
" return int(response.choices[0].message.content.strip())\n",
|
| 374 |
+
" except Exception:\n",
|
| 375 |
+
" return 3 # default to medium complexity on error\n",
|
| 376 |
+
" \n",
|
| 377 |
+
"def route_question_to_model(question: str, models_by_rank, classifier_model=router, model_name=orchestrator_model):\n",
|
| 378 |
+
" level = classify_question_complexity(question, classifier_model, model_name)\n",
|
| 379 |
+
" selected_model_name = models_by_rank[level - 1]\n",
|
| 380 |
+
" return selected_model_name"
|
| 381 |
+
]
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"cell_type": "code",
|
| 385 |
+
"execution_count": 16,
|
| 386 |
+
"metadata": {},
|
| 387 |
+
"outputs": [],
|
| 388 |
+
"source": [
|
| 389 |
+
"difficulty_prompts = [\n",
|
| 390 |
+
" \"Generate a very basic, factual question that a small or entry-level language model could answer easily. It should require no reasoning, just direct knowledge lookup.\",\n",
|
| 391 |
+
" \"Generate a slightly involved question that requires basic reasoning, comparison, or combining two known facts. Still within the grasp of small models but not purely factual.\",\n",
|
| 392 |
+
" \"Generate a moderately challenging question that requires some synthesis of ideas, multi-step reasoning, or contextual understanding. A mid-tier model should be able to answer it with effort.\",\n",
|
| 393 |
+
" \"Generate a difficult question involving abstract thinking, open-ended reasoning, or ethical tradeoffs. The question should challenge large models to produce thoughtful and coherent responses.\",\n",
|
| 394 |
+
" \"Generate an extremely complex and nuanced question that tests the limits of current language models. It should require deep reasoning, long-term planning, philosophy, or advanced multi-domain knowledge.\"\n",
|
| 395 |
+
"]\n",
|
| 396 |
+
"def generate_question(level, generator=generator, generator_model=orchestrator_model):\n",
|
| 397 |
+
" prompt = (\n",
|
| 398 |
+
" f\"{difficulty_prompts[level - 1]}\\n\"\n",
|
| 399 |
+
" \"Answer only with the question, no explanation.\"\n",
|
| 400 |
+
" )\n",
|
| 401 |
+
" messages = [{\"role\": \"user\", \"content\": prompt}]\n",
|
| 402 |
+
" response = generator.chat.completions.create(\n",
|
| 403 |
+
" model=generator_model, # or your planner model\n",
|
| 404 |
+
" messages=messages\n",
|
| 405 |
+
" )\n",
|
| 406 |
+
" \n",
|
| 407 |
+
" return response.choices[0].message.content\n",
|
| 408 |
+
"\n"
|
| 409 |
+
]
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"cell_type": "markdown",
|
| 413 |
+
"metadata": {},
|
| 414 |
+
"source": [
|
| 415 |
+
"### Testing Routing Workflow"
|
| 416 |
+
]
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"cell_type": "markdown",
|
| 420 |
+
"metadata": {},
|
| 421 |
+
"source": [
|
| 422 |
+
"Finally, to test the routing workflow, we create a function that accepts a task complexity level and triggers the full routing process.\n",
|
| 423 |
+
"\n",
|
| 424 |
+
"*Note: A level-N prompt isn't always assigned to the Nth-most capable model due to the classifier's subjective decisions.*"
|
| 425 |
+
]
|
| 426 |
+
},
|
| 427 |
+
{
|
| 428 |
+
"cell_type": "code",
|
| 429 |
+
"execution_count": 17,
|
| 430 |
+
"metadata": {},
|
| 431 |
+
"outputs": [],
|
| 432 |
+
"source": [
|
| 433 |
+
"def test_generation_routing(level):\n",
|
| 434 |
+
" question = generate_question(level=level)\n",
|
| 435 |
+
" answer_model = route_question_to_model(question, ranked_model_names)\n",
|
| 436 |
+
" messages = [{\"role\": \"user\", \"content\": question}]\n",
|
| 437 |
+
"\n",
|
| 438 |
+
" response =qa_models[answer_model].chat.completions.create(\n",
|
| 439 |
+
" model=answer_model, # or your planner model\n",
|
| 440 |
+
" messages=messages\n",
|
| 441 |
+
" )\n",
|
| 442 |
+
" print(f\"Question : {question}\")\n",
|
| 443 |
+
" print(f\"Routed to {answer_model}\")\n",
|
| 444 |
+
" display(Markdown(response.choices[0].message.content))"
|
| 445 |
+
]
|
| 446 |
+
},
|
| 447 |
+
{
|
| 448 |
+
"cell_type": "code",
|
| 449 |
+
"execution_count": null,
|
| 450 |
+
"metadata": {},
|
| 451 |
+
"outputs": [],
|
| 452 |
+
"source": [
|
| 453 |
+
"test_generation_routing(level=1)"
|
| 454 |
+
]
|
| 455 |
+
},
|
| 456 |
+
{
|
| 457 |
+
"cell_type": "code",
|
| 458 |
+
"execution_count": null,
|
| 459 |
+
"metadata": {},
|
| 460 |
+
"outputs": [],
|
| 461 |
+
"source": [
|
| 462 |
+
"test_generation_routing(level=2)"
|
| 463 |
+
]
|
| 464 |
+
},
|
| 465 |
+
{
|
| 466 |
+
"cell_type": "code",
|
| 467 |
+
"execution_count": null,
|
| 468 |
+
"metadata": {},
|
| 469 |
+
"outputs": [],
|
| 470 |
+
"source": [
|
| 471 |
+
"test_generation_routing(level=3)"
|
| 472 |
+
]
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"cell_type": "code",
|
| 476 |
+
"execution_count": null,
|
| 477 |
+
"metadata": {},
|
| 478 |
+
"outputs": [],
|
| 479 |
+
"source": [
|
| 480 |
+
"test_generation_routing(level=4)"
|
| 481 |
+
]
|
| 482 |
+
},
|
| 483 |
+
{
|
| 484 |
+
"cell_type": "code",
|
| 485 |
+
"execution_count": null,
|
| 486 |
+
"metadata": {},
|
| 487 |
+
"outputs": [],
|
| 488 |
+
"source": [
|
| 489 |
+
"test_generation_routing(level=5)"
|
| 490 |
+
]
|
| 491 |
+
}
|
| 492 |
+
],
|
| 493 |
+
"metadata": {
|
| 494 |
+
"kernelspec": {
|
| 495 |
+
"display_name": ".venv",
|
| 496 |
+
"language": "python",
|
| 497 |
+
"name": "python3"
|
| 498 |
+
},
|
| 499 |
+
"language_info": {
|
| 500 |
+
"codemirror_mode": {
|
| 501 |
+
"name": "ipython",
|
| 502 |
+
"version": 3
|
| 503 |
+
},
|
| 504 |
+
"file_extension": ".py",
|
| 505 |
+
"mimetype": "text/x-python",
|
| 506 |
+
"name": "python",
|
| 507 |
+
"nbconvert_exporter": "python",
|
| 508 |
+
"pygments_lexer": "ipython3",
|
| 509 |
+
"version": "3.12.11"
|
| 510 |
+
}
|
| 511 |
+
},
|
| 512 |
+
"nbformat": 4,
|
| 513 |
+
"nbformat_minor": 2
|
| 514 |
+
}
|
community_contributions/2_lab2-Evaluator-AnnpaS18.ipynb
ADDED
|
@@ -0,0 +1,474 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": 1,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"import os\n",
|
| 39 |
+
"import json\n",
|
| 40 |
+
"from dotenv import load_dotenv\n",
|
| 41 |
+
"from openai import OpenAI\n",
|
| 42 |
+
"from anthropic import Anthropic\n",
|
| 43 |
+
"from IPython.display import Markdown, display"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"# Always remember to do this!\n",
|
| 53 |
+
"load_dotenv(override=True)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"if openai_api_key:\n",
|
| 71 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
+
" \n",
|
| 75 |
+
"if anthropic_api_key:\n",
|
| 76 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"if google_api_key:\n",
|
| 81 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
+
"else:\n",
|
| 83 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"if deepseek_api_key:\n",
|
| 86 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
+
"else:\n",
|
| 88 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"if groq_api_key:\n",
|
| 91 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
+
"else:\n",
|
| 93 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": 4,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "code",
|
| 109 |
+
"execution_count": null,
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"source": [
|
| 113 |
+
"messages"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": null,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [],
|
| 121 |
+
"source": [
|
| 122 |
+
"openai = OpenAI()\n",
|
| 123 |
+
"response = openai.chat.completions.create(\n",
|
| 124 |
+
" model=\"gpt-4o-mini\",\n",
|
| 125 |
+
" messages=messages,\n",
|
| 126 |
+
")\n",
|
| 127 |
+
"question = response.choices[0].message.content\n",
|
| 128 |
+
"print(question)\n"
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"cell_type": "code",
|
| 133 |
+
"execution_count": 7,
|
| 134 |
+
"metadata": {},
|
| 135 |
+
"outputs": [],
|
| 136 |
+
"source": [
|
| 137 |
+
"competitors = []\n",
|
| 138 |
+
"answers = []\n",
|
| 139 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": null,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [],
|
| 147 |
+
"source": [
|
| 148 |
+
"# The API we know well\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"model_name = \"gpt-4o-mini\"\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
+
"answer = response.choices[0].message.content\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"display(Markdown(answer))\n",
|
| 156 |
+
"competitors.append(model_name)\n",
|
| 157 |
+
"answers.append(answer)"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": null,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"claude = Anthropic()\n",
|
| 171 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 172 |
+
"answer = response.content[0].text\n",
|
| 173 |
+
"\n",
|
| 174 |
+
"display(Markdown(answer))\n",
|
| 175 |
+
"competitors.append(model_name)\n",
|
| 176 |
+
"answers.append(answer)"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"cell_type": "code",
|
| 181 |
+
"execution_count": null,
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"outputs": [],
|
| 184 |
+
"source": [
|
| 185 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 186 |
+
"model_name = \"gemini-2.0-flash\"\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
+
"answer = response.choices[0].message.content\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"display(Markdown(answer))\n",
|
| 192 |
+
"competitors.append(model_name)\n",
|
| 193 |
+
"answers.append(answer)"
|
| 194 |
+
]
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"cell_type": "code",
|
| 198 |
+
"execution_count": null,
|
| 199 |
+
"metadata": {},
|
| 200 |
+
"outputs": [],
|
| 201 |
+
"source": [
|
| 202 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 203 |
+
"model_name = \"deepseek-chat\"\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
+
"answer = response.choices[0].message.content\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"display(Markdown(answer))\n",
|
| 209 |
+
"competitors.append(model_name)\n",
|
| 210 |
+
"answers.append(answer)"
|
| 211 |
+
]
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"cell_type": "code",
|
| 215 |
+
"execution_count": null,
|
| 216 |
+
"metadata": {},
|
| 217 |
+
"outputs": [],
|
| 218 |
+
"source": [
|
| 219 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 220 |
+
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 223 |
+
"answer = response.choices[0].message.content\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"display(Markdown(answer))\n",
|
| 226 |
+
"competitors.append(model_name)\n",
|
| 227 |
+
"answers.append(answer)\n"
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"cell_type": "markdown",
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"source": [
|
| 234 |
+
"## For the next cell, we will use Ollama\n",
|
| 235 |
+
"\n",
|
| 236 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 237 |
+
"and runs models locally using high performance C++ code.\n",
|
| 238 |
+
"\n",
|
| 239 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 240 |
+
"\n",
|
| 241 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 246 |
+
"\n",
|
| 247 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 248 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 249 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 250 |
+
]
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"cell_type": "markdown",
|
| 254 |
+
"metadata": {},
|
| 255 |
+
"source": [
|
| 256 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 257 |
+
" <tr>\n",
|
| 258 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 259 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 260 |
+
" </td>\n",
|
| 261 |
+
" <td>\n",
|
| 262 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 263 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 264 |
+
" </span>\n",
|
| 265 |
+
" </td>\n",
|
| 266 |
+
" </tr>\n",
|
| 267 |
+
"</table>"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": null,
|
| 273 |
+
"metadata": {},
|
| 274 |
+
"outputs": [],
|
| 275 |
+
"source": [
|
| 276 |
+
"!ollama pull llama3.2"
|
| 277 |
+
]
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
"cell_type": "code",
|
| 281 |
+
"execution_count": null,
|
| 282 |
+
"metadata": {},
|
| 283 |
+
"outputs": [],
|
| 284 |
+
"source": [
|
| 285 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 286 |
+
"model_name = \"llama3.2\"\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 289 |
+
"answer = response.choices[0].message.content\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"display(Markdown(answer))\n",
|
| 292 |
+
"competitors.append(model_name)\n",
|
| 293 |
+
"answers.append(answer)"
|
| 294 |
+
]
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"cell_type": "code",
|
| 298 |
+
"execution_count": null,
|
| 299 |
+
"metadata": {},
|
| 300 |
+
"outputs": [],
|
| 301 |
+
"source": [
|
| 302 |
+
"# So where are we?\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"print(competitors)\n",
|
| 305 |
+
"print(answers)\n"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"cell_type": "code",
|
| 310 |
+
"execution_count": null,
|
| 311 |
+
"metadata": {},
|
| 312 |
+
"outputs": [],
|
| 313 |
+
"source": [
|
| 314 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 315 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 316 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "code",
|
| 321 |
+
"execution_count": 20,
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"outputs": [],
|
| 324 |
+
"source": [
|
| 325 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 326 |
+
"\n",
|
| 327 |
+
"together = \"\"\n",
|
| 328 |
+
"for index, answer in enumerate(answers):\n",
|
| 329 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 330 |
+
" together += answer + \"\\n\\n\""
|
| 331 |
+
]
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"cell_type": "code",
|
| 335 |
+
"execution_count": null,
|
| 336 |
+
"metadata": {},
|
| 337 |
+
"outputs": [],
|
| 338 |
+
"source": [
|
| 339 |
+
"print(together)"
|
| 340 |
+
]
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"cell_type": "code",
|
| 344 |
+
"execution_count": 22,
|
| 345 |
+
"metadata": {},
|
| 346 |
+
"outputs": [],
|
| 347 |
+
"source": [
|
| 348 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 349 |
+
"Each model has been given this question:\n",
|
| 350 |
+
"\n",
|
| 351 |
+
"{question}\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 354 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 355 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"Here are the responses from each competitor:\n",
|
| 358 |
+
"\n",
|
| 359 |
+
"{together}\n",
|
| 360 |
+
"\n",
|
| 361 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 362 |
+
]
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"cell_type": "code",
|
| 366 |
+
"execution_count": null,
|
| 367 |
+
"metadata": {},
|
| 368 |
+
"outputs": [],
|
| 369 |
+
"source": [
|
| 370 |
+
"print(judge)"
|
| 371 |
+
]
|
| 372 |
+
},
|
| 373 |
+
{
|
| 374 |
+
"cell_type": "code",
|
| 375 |
+
"execution_count": 29,
|
| 376 |
+
"metadata": {},
|
| 377 |
+
"outputs": [],
|
| 378 |
+
"source": [
|
| 379 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"cell_type": "code",
|
| 384 |
+
"execution_count": null,
|
| 385 |
+
"metadata": {},
|
| 386 |
+
"outputs": [],
|
| 387 |
+
"source": [
|
| 388 |
+
"# Judgement time!\n",
|
| 389 |
+
"\n",
|
| 390 |
+
"openai = OpenAI()\n",
|
| 391 |
+
"response = openai.chat.completions.create(\n",
|
| 392 |
+
" model=\"o3-mini\",\n",
|
| 393 |
+
" messages=judge_messages,\n",
|
| 394 |
+
")\n",
|
| 395 |
+
"results = response.choices[0].message.content\n",
|
| 396 |
+
"print(results)\n"
|
| 397 |
+
]
|
| 398 |
+
},
|
| 399 |
+
{
|
| 400 |
+
"cell_type": "code",
|
| 401 |
+
"execution_count": null,
|
| 402 |
+
"metadata": {},
|
| 403 |
+
"outputs": [],
|
| 404 |
+
"source": [
|
| 405 |
+
"# OK let's turn this into results!\n",
|
| 406 |
+
"\n",
|
| 407 |
+
"results_dict = json.loads(results)\n",
|
| 408 |
+
"ranks = results_dict[\"results\"]\n",
|
| 409 |
+
"for index, result in enumerate(ranks):\n",
|
| 410 |
+
" competitor = competitors[int(result)-1]\n",
|
| 411 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 412 |
+
]
|
| 413 |
+
},
|
| 414 |
+
{
|
| 415 |
+
"cell_type": "markdown",
|
| 416 |
+
"metadata": {},
|
| 417 |
+
"source": [
|
| 418 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 419 |
+
" <tr>\n",
|
| 420 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 421 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 422 |
+
" </td>\n",
|
| 423 |
+
" <td>\n",
|
| 424 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 425 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 426 |
+
" </span>\n",
|
| 427 |
+
" </td>\n",
|
| 428 |
+
" </tr>\n",
|
| 429 |
+
"</table>"
|
| 430 |
+
]
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"cell_type": "markdown",
|
| 434 |
+
"metadata": {},
|
| 435 |
+
"source": [
|
| 436 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 437 |
+
" <tr>\n",
|
| 438 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 439 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 440 |
+
" </td>\n",
|
| 441 |
+
" <td>\n",
|
| 442 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 443 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 444 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 445 |
+
" to business projects where accuracy is critical.\n",
|
| 446 |
+
" </span>\n",
|
| 447 |
+
" </td>\n",
|
| 448 |
+
" </tr>\n",
|
| 449 |
+
"</table>"
|
| 450 |
+
]
|
| 451 |
+
}
|
| 452 |
+
],
|
| 453 |
+
"metadata": {
|
| 454 |
+
"kernelspec": {
|
| 455 |
+
"display_name": ".venv",
|
| 456 |
+
"language": "python",
|
| 457 |
+
"name": "python3"
|
| 458 |
+
},
|
| 459 |
+
"language_info": {
|
| 460 |
+
"codemirror_mode": {
|
| 461 |
+
"name": "ipython",
|
| 462 |
+
"version": 3
|
| 463 |
+
},
|
| 464 |
+
"file_extension": ".py",
|
| 465 |
+
"mimetype": "text/x-python",
|
| 466 |
+
"name": "python",
|
| 467 |
+
"nbconvert_exporter": "python",
|
| 468 |
+
"pygments_lexer": "ipython3",
|
| 469 |
+
"version": "3.12.9"
|
| 470 |
+
}
|
| 471 |
+
},
|
| 472 |
+
"nbformat": 4,
|
| 473 |
+
"nbformat_minor": 2
|
| 474 |
+
}
|
community_contributions/2_lab2-judge-prompt-changed.ipynb
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": 1,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"import os\n",
|
| 39 |
+
"import json\n",
|
| 40 |
+
"from dotenv import load_dotenv\n",
|
| 41 |
+
"from openai import OpenAI\n",
|
| 42 |
+
"from anthropic import Anthropic\n",
|
| 43 |
+
"from IPython.display import Markdown, display"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"# Always remember to do this!\n",
|
| 53 |
+
"load_dotenv(override=True)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"if openai_api_key:\n",
|
| 71 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
+
" \n",
|
| 75 |
+
"if anthropic_api_key:\n",
|
| 76 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"if google_api_key:\n",
|
| 81 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
+
"else:\n",
|
| 83 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"if deepseek_api_key:\n",
|
| 86 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
+
"else:\n",
|
| 88 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"if groq_api_key:\n",
|
| 91 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
+
"else:\n",
|
| 93 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": 4,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "code",
|
| 109 |
+
"execution_count": null,
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"source": [
|
| 113 |
+
"messages"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": null,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [],
|
| 121 |
+
"source": [
|
| 122 |
+
"openai = OpenAI()\n",
|
| 123 |
+
"response = openai.chat.completions.create(\n",
|
| 124 |
+
" model=\"gpt-4o-mini\",\n",
|
| 125 |
+
" messages=messages,\n",
|
| 126 |
+
")\n",
|
| 127 |
+
"question = response.choices[0].message.content\n",
|
| 128 |
+
"print(question)\n"
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"cell_type": "code",
|
| 133 |
+
"execution_count": 7,
|
| 134 |
+
"metadata": {},
|
| 135 |
+
"outputs": [],
|
| 136 |
+
"source": [
|
| 137 |
+
"competitors = []\n",
|
| 138 |
+
"answers = []\n",
|
| 139 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": null,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [],
|
| 147 |
+
"source": [
|
| 148 |
+
"# The API we know well\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"model_name = \"gpt-4o-mini\"\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
+
"answer = response.choices[0].message.content\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"display(Markdown(answer))\n",
|
| 156 |
+
"competitors.append(model_name)\n",
|
| 157 |
+
"answers.append(answer)"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": null,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"claude = Anthropic()\n",
|
| 171 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 172 |
+
"answer = response.content[0].text\n",
|
| 173 |
+
"\n",
|
| 174 |
+
"display(Markdown(answer))\n",
|
| 175 |
+
"competitors.append(model_name)\n",
|
| 176 |
+
"answers.append(answer)"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"cell_type": "code",
|
| 181 |
+
"execution_count": null,
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"outputs": [],
|
| 184 |
+
"source": [
|
| 185 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 186 |
+
"model_name = \"gemini-2.0-flash\"\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
+
"answer = response.choices[0].message.content\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"display(Markdown(answer))\n",
|
| 192 |
+
"competitors.append(model_name)\n",
|
| 193 |
+
"answers.append(answer)"
|
| 194 |
+
]
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"cell_type": "code",
|
| 198 |
+
"execution_count": null,
|
| 199 |
+
"metadata": {},
|
| 200 |
+
"outputs": [],
|
| 201 |
+
"source": [
|
| 202 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 203 |
+
"model_name = \"deepseek-chat\"\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
+
"answer = response.choices[0].message.content\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"display(Markdown(answer))\n",
|
| 209 |
+
"competitors.append(model_name)\n",
|
| 210 |
+
"answers.append(answer)"
|
| 211 |
+
]
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"cell_type": "code",
|
| 215 |
+
"execution_count": null,
|
| 216 |
+
"metadata": {},
|
| 217 |
+
"outputs": [],
|
| 218 |
+
"source": [
|
| 219 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 220 |
+
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 223 |
+
"answer = response.choices[0].message.content\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"display(Markdown(answer))\n",
|
| 226 |
+
"competitors.append(model_name)\n",
|
| 227 |
+
"answers.append(answer)\n"
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"cell_type": "markdown",
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"source": [
|
| 234 |
+
"## For the next cell, we will use Ollama\n",
|
| 235 |
+
"\n",
|
| 236 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 237 |
+
"and runs models locally using high performance C++ code.\n",
|
| 238 |
+
"\n",
|
| 239 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 240 |
+
"\n",
|
| 241 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 246 |
+
"\n",
|
| 247 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 248 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 249 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 250 |
+
]
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"cell_type": "markdown",
|
| 254 |
+
"metadata": {},
|
| 255 |
+
"source": [
|
| 256 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 257 |
+
" <tr>\n",
|
| 258 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 259 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 260 |
+
" </td>\n",
|
| 261 |
+
" <td>\n",
|
| 262 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 263 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 264 |
+
" </span>\n",
|
| 265 |
+
" </td>\n",
|
| 266 |
+
" </tr>\n",
|
| 267 |
+
"</table>"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": null,
|
| 273 |
+
"metadata": {},
|
| 274 |
+
"outputs": [],
|
| 275 |
+
"source": [
|
| 276 |
+
"!ollama pull llama3.2"
|
| 277 |
+
]
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
"cell_type": "code",
|
| 281 |
+
"execution_count": null,
|
| 282 |
+
"metadata": {},
|
| 283 |
+
"outputs": [],
|
| 284 |
+
"source": [
|
| 285 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 286 |
+
"model_name = \"llama3.2\"\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 289 |
+
"answer = response.choices[0].message.content\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"display(Markdown(answer))\n",
|
| 292 |
+
"competitors.append(model_name)\n",
|
| 293 |
+
"answers.append(answer)"
|
| 294 |
+
]
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"cell_type": "code",
|
| 298 |
+
"execution_count": null,
|
| 299 |
+
"metadata": {},
|
| 300 |
+
"outputs": [],
|
| 301 |
+
"source": [
|
| 302 |
+
"# So where are we?\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"print(competitors)\n",
|
| 305 |
+
"print(answers)\n"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"cell_type": "code",
|
| 310 |
+
"execution_count": null,
|
| 311 |
+
"metadata": {},
|
| 312 |
+
"outputs": [],
|
| 313 |
+
"source": [
|
| 314 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 315 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 316 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "code",
|
| 321 |
+
"execution_count": 20,
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"outputs": [],
|
| 324 |
+
"source": [
|
| 325 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 326 |
+
"\n",
|
| 327 |
+
"together = \"\"\n",
|
| 328 |
+
"for index, answer in enumerate(answers):\n",
|
| 329 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 330 |
+
" together += answer + \"\\n\\n\""
|
| 331 |
+
]
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"cell_type": "code",
|
| 335 |
+
"execution_count": null,
|
| 336 |
+
"metadata": {},
|
| 337 |
+
"outputs": [],
|
| 338 |
+
"source": [
|
| 339 |
+
"print(together)"
|
| 340 |
+
]
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"cell_type": "code",
|
| 344 |
+
"execution_count": null,
|
| 345 |
+
"metadata": {},
|
| 346 |
+
"outputs": [],
|
| 347 |
+
"source": [
|
| 348 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 349 |
+
"Each model has been given this question:\n",
|
| 350 |
+
"\n",
|
| 351 |
+
"{question}\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 354 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 355 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 356 |
+
"Answer only the number for example\n",
|
| 357 |
+
"{{\"results\": [\"1\", \"2\", \"3\", ...]}}\n",
|
| 358 |
+
"\n",
|
| 359 |
+
"Here are the responses from each competitor:\n",
|
| 360 |
+
"\n",
|
| 361 |
+
"{together}\n",
|
| 362 |
+
"\n",
|
| 363 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 364 |
+
]
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"cell_type": "code",
|
| 368 |
+
"execution_count": null,
|
| 369 |
+
"metadata": {},
|
| 370 |
+
"outputs": [],
|
| 371 |
+
"source": [
|
| 372 |
+
"print(judge)"
|
| 373 |
+
]
|
| 374 |
+
},
|
| 375 |
+
{
|
| 376 |
+
"cell_type": "code",
|
| 377 |
+
"execution_count": 29,
|
| 378 |
+
"metadata": {},
|
| 379 |
+
"outputs": [],
|
| 380 |
+
"source": [
|
| 381 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 382 |
+
]
|
| 383 |
+
},
|
| 384 |
+
{
|
| 385 |
+
"cell_type": "code",
|
| 386 |
+
"execution_count": null,
|
| 387 |
+
"metadata": {},
|
| 388 |
+
"outputs": [],
|
| 389 |
+
"source": [
|
| 390 |
+
"# Judgement time!\n",
|
| 391 |
+
"\n",
|
| 392 |
+
"openai = OpenAI()\n",
|
| 393 |
+
"response = openai.chat.completions.create(\n",
|
| 394 |
+
" model=\"o3-mini\",\n",
|
| 395 |
+
" messages=judge_messages,\n",
|
| 396 |
+
")\n",
|
| 397 |
+
"results = response.choices[0].message.content\n",
|
| 398 |
+
"print(results)\n"
|
| 399 |
+
]
|
| 400 |
+
},
|
| 401 |
+
{
|
| 402 |
+
"cell_type": "code",
|
| 403 |
+
"execution_count": null,
|
| 404 |
+
"metadata": {},
|
| 405 |
+
"outputs": [],
|
| 406 |
+
"source": [
|
| 407 |
+
"# OK let's turn this into results!\n",
|
| 408 |
+
"\n",
|
| 409 |
+
"results_dict = json.loads(results)\n",
|
| 410 |
+
"ranks = results_dict[\"results\"]\n",
|
| 411 |
+
"for index, result in enumerate(ranks):\n",
|
| 412 |
+
" competitor = competitors[int(result)-1]\n",
|
| 413 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 414 |
+
]
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"cell_type": "markdown",
|
| 418 |
+
"metadata": {},
|
| 419 |
+
"source": [
|
| 420 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 421 |
+
" <tr>\n",
|
| 422 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 423 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 424 |
+
" </td>\n",
|
| 425 |
+
" <td>\n",
|
| 426 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 427 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 428 |
+
" </span>\n",
|
| 429 |
+
" </td>\n",
|
| 430 |
+
" </tr>\n",
|
| 431 |
+
"</table>"
|
| 432 |
+
]
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"cell_type": "markdown",
|
| 436 |
+
"metadata": {},
|
| 437 |
+
"source": [
|
| 438 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 439 |
+
" <tr>\n",
|
| 440 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 441 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 442 |
+
" </td>\n",
|
| 443 |
+
" <td>\n",
|
| 444 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 445 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 446 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 447 |
+
" to business projects where accuracy is critical.\n",
|
| 448 |
+
" </span>\n",
|
| 449 |
+
" </td>\n",
|
| 450 |
+
" </tr>\n",
|
| 451 |
+
"</table>"
|
| 452 |
+
]
|
| 453 |
+
}
|
| 454 |
+
],
|
| 455 |
+
"metadata": {
|
| 456 |
+
"kernelspec": {
|
| 457 |
+
"display_name": ".venv",
|
| 458 |
+
"language": "python",
|
| 459 |
+
"name": "python3"
|
| 460 |
+
},
|
| 461 |
+
"language_info": {
|
| 462 |
+
"codemirror_mode": {
|
| 463 |
+
"name": "ipython",
|
| 464 |
+
"version": 3
|
| 465 |
+
},
|
| 466 |
+
"file_extension": ".py",
|
| 467 |
+
"mimetype": "text/x-python",
|
| 468 |
+
"name": "python",
|
| 469 |
+
"nbconvert_exporter": "python",
|
| 470 |
+
"pygments_lexer": "ipython3",
|
| 471 |
+
"version": "3.12.9"
|
| 472 |
+
}
|
| 473 |
+
},
|
| 474 |
+
"nbformat": 4,
|
| 475 |
+
"nbformat_minor": 2
|
| 476 |
+
}
|
community_contributions/2_lab2-parallelization.ipynb
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Changes I've made with this lab.\n",
|
| 10 |
+
"1) Modified the original question to instead generate a range of questions, 12 of them. These questions will be used to evaluate each LLM's reasoning, knowledge, creativity, and ability to handle nuanced scenarios.\n",
|
| 11 |
+
"2) I've changed this lab to run the queries in parallel. Thanks GPT for helping with the code to do that. :)\n",
|
| 12 |
+
"3) Instead of having one LLM rate all the responses, I have all of the LLM's rate each others work and then use a Borda Count to asign points to determine the winner."
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"cell_type": "code",
|
| 17 |
+
"execution_count": null,
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"outputs": [],
|
| 20 |
+
"source": [
|
| 21 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 22 |
+
"\n",
|
| 23 |
+
"import os\n",
|
| 24 |
+
"import json\n",
|
| 25 |
+
"from dotenv import load_dotenv\n",
|
| 26 |
+
"from openai import OpenAI\n",
|
| 27 |
+
"from anthropic import Anthropic\n",
|
| 28 |
+
"from IPython.display import Markdown, display"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "code",
|
| 33 |
+
"execution_count": null,
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"outputs": [],
|
| 36 |
+
"source": [
|
| 37 |
+
"# Always remember to do this!\n",
|
| 38 |
+
"load_dotenv(override=True)"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"cell_type": "code",
|
| 43 |
+
"execution_count": null,
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [],
|
| 46 |
+
"source": [
|
| 47 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 48 |
+
"\n",
|
| 49 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 50 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 51 |
+
"gemini_api_key = os.getenv('GEMINI_API_KEY')\n",
|
| 52 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 53 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 54 |
+
"\n",
|
| 55 |
+
"if openai_api_key:\n",
|
| 56 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 57 |
+
"else:\n",
|
| 58 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 59 |
+
" \n",
|
| 60 |
+
"if anthropic_api_key:\n",
|
| 61 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 62 |
+
"else:\n",
|
| 63 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 64 |
+
"\n",
|
| 65 |
+
"if gemini_api_key:\n",
|
| 66 |
+
" print(f\"Gemini API Key exists and begins {gemini_api_key[:2]}\")\n",
|
| 67 |
+
"else:\n",
|
| 68 |
+
" print(\"Gemini API Key not set (and this is optional)\")\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"if deepseek_api_key:\n",
|
| 71 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 74 |
+
"\n",
|
| 75 |
+
"if groq_api_key:\n",
|
| 76 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"cell_type": "code",
|
| 83 |
+
"execution_count": null,
|
| 84 |
+
"metadata": {},
|
| 85 |
+
"outputs": [],
|
| 86 |
+
"source": [
|
| 87 |
+
"request = \"\"\"You are being evaluated for your reasoning, knowledge, creativity, and ability to handle nuanced scenarios. \n",
|
| 88 |
+
"Generate 12 questions that cover the following categories:\n",
|
| 89 |
+
"- Logical reasoning and problem solving\n",
|
| 90 |
+
"- Creative writing and storytelling\n",
|
| 91 |
+
"- Factual accuracy and knowledge recall\n",
|
| 92 |
+
"- Following instructions with strict constraints\n",
|
| 93 |
+
"- Multi-step planning and organization\n",
|
| 94 |
+
"- Ethical dilemmas and debatable issues\n",
|
| 95 |
+
"- Philosophical or abstract reasoning\n",
|
| 96 |
+
"- Summarization and explanation at different levels\n",
|
| 97 |
+
"- Translation and multilingual ability\n",
|
| 98 |
+
"- Roleplay or adaptive communication style\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"Number each question from 1 to 12. \n",
|
| 101 |
+
"The result should be a balanced benchmark question set that fully tests an LLM’s capabilities.\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"Important: Output only clean plain text. \n",
|
| 104 |
+
"Do not use any markup, formatting symbols, quotation marks, brackets, lists, or special characters \n",
|
| 105 |
+
"that could cause misinterpretation. Only provide plain text questions, one per line, numbered 1 to 20.\n",
|
| 106 |
+
"\"\"\"\n",
|
| 107 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 108 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 109 |
+
]
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"cell_type": "code",
|
| 113 |
+
"execution_count": null,
|
| 114 |
+
"metadata": {},
|
| 115 |
+
"outputs": [],
|
| 116 |
+
"source": [
|
| 117 |
+
"# Generate the questions.\n",
|
| 118 |
+
"openai = OpenAI()\n",
|
| 119 |
+
"response = openai.chat.completions.create(\n",
|
| 120 |
+
" model=\"gpt-4o-mini\",\n",
|
| 121 |
+
" messages=messages,\n",
|
| 122 |
+
")\n",
|
| 123 |
+
"question = response.choices[0].message.content\n",
|
| 124 |
+
"\n",
|
| 125 |
+
"display(Markdown(question))"
|
| 126 |
+
]
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"cell_type": "code",
|
| 130 |
+
"execution_count": null,
|
| 131 |
+
"metadata": {},
|
| 132 |
+
"outputs": [],
|
| 133 |
+
"source": [
|
| 134 |
+
"competitors = []\n",
|
| 135 |
+
"answers = []\n",
|
| 136 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"cell_type": "code",
|
| 141 |
+
"execution_count": null,
|
| 142 |
+
"metadata": {},
|
| 143 |
+
"outputs": [],
|
| 144 |
+
"source": [
|
| 145 |
+
"# Ask the LLM's in Parallel\n",
|
| 146 |
+
"\n",
|
| 147 |
+
"import asyncio\n",
|
| 148 |
+
"\n",
|
| 149 |
+
"clients = {\n",
|
| 150 |
+
" \"openai\": OpenAI(),\n",
|
| 151 |
+
" \"claude\": Anthropic(),\n",
|
| 152 |
+
" \"gemini\": OpenAI(api_key=gemini_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"),\n",
|
| 153 |
+
" \"deepseek\": OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\"),\n",
|
| 154 |
+
" \"groq\": OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\"),\n",
|
| 155 |
+
"}\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"# Get the answers from the LLM\n",
|
| 158 |
+
"async def call_llm(model_name, messages):\n",
|
| 159 |
+
" try:\n",
|
| 160 |
+
" if \"claude\" in model_name:\n",
|
| 161 |
+
" response = await asyncio.to_thread(\n",
|
| 162 |
+
" clients[\"claude\"].messages.create,\n",
|
| 163 |
+
" model=model_name,\n",
|
| 164 |
+
" messages=messages,\n",
|
| 165 |
+
" max_tokens=3000,\n",
|
| 166 |
+
" )\n",
|
| 167 |
+
" answer = \"\".join([c.text for c in response.content if c.type == \"text\"])\n",
|
| 168 |
+
" \n",
|
| 169 |
+
" elif \"gpt-4o-mini\" in model_name:\n",
|
| 170 |
+
" response = await asyncio.to_thread(\n",
|
| 171 |
+
" clients[\"openai\"].chat.completions.create,\n",
|
| 172 |
+
" model=model_name,\n",
|
| 173 |
+
" messages=messages,\n",
|
| 174 |
+
" )\n",
|
| 175 |
+
" answer = response.choices[0].message.content\n",
|
| 176 |
+
"\n",
|
| 177 |
+
" elif \"gemini\" in model_name:\n",
|
| 178 |
+
" response = await asyncio.to_thread(\n",
|
| 179 |
+
" clients[\"gemini\"].chat.completions.create,\n",
|
| 180 |
+
" model=model_name,\n",
|
| 181 |
+
" messages=messages,\n",
|
| 182 |
+
" )\n",
|
| 183 |
+
" answer = response.choices[0].message.content\n",
|
| 184 |
+
"\n",
|
| 185 |
+
" elif \"deepseek\" in model_name:\n",
|
| 186 |
+
" response = await asyncio.to_thread(\n",
|
| 187 |
+
" clients[\"deepseek\"].chat.completions.create,\n",
|
| 188 |
+
" model=model_name,\n",
|
| 189 |
+
" messages=messages,\n",
|
| 190 |
+
" )\n",
|
| 191 |
+
" answer = response.choices[0].message.content\n",
|
| 192 |
+
"\n",
|
| 193 |
+
" elif \"llama\" in model_name:\n",
|
| 194 |
+
" response = await asyncio.to_thread(\n",
|
| 195 |
+
" clients[\"groq\"].chat.completions.create,\n",
|
| 196 |
+
" model=model_name,\n",
|
| 197 |
+
" messages=messages,\n",
|
| 198 |
+
" )\n",
|
| 199 |
+
" answer = response.choices[0].message.content\n",
|
| 200 |
+
"\n",
|
| 201 |
+
" return model_name, answer \n",
|
| 202 |
+
"\n",
|
| 203 |
+
" except Exception as e:\n",
|
| 204 |
+
" print (f\"❌ Error: {str(e)}\")\n",
|
| 205 |
+
" return model_name, \"I was not able to generate answers for any of the questions.\"\n",
|
| 206 |
+
"\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"# send out the calls to the LLM to ask teh questions.\n",
|
| 209 |
+
"async def ask_questions_in_parallel(messages):\n",
|
| 210 |
+
" competitor_models = [\n",
|
| 211 |
+
" \"gpt-4o-mini\",\n",
|
| 212 |
+
" \"claude-3-7-sonnet-latest\",\n",
|
| 213 |
+
" \"gemini-2.0-flash\",\n",
|
| 214 |
+
" \"deepseek-chat\",\n",
|
| 215 |
+
" \"llama-3.3-70b-versatile\"\n",
|
| 216 |
+
" ]\n",
|
| 217 |
+
"\n",
|
| 218 |
+
" # create tasks to call the LLM's in parallel\n",
|
| 219 |
+
" tasks = [call_llm(model, messages) for model in competitor_models]\n",
|
| 220 |
+
"\n",
|
| 221 |
+
" answers = []\n",
|
| 222 |
+
" competitors = []\n",
|
| 223 |
+
"\n",
|
| 224 |
+
" # When we have an answer, we can process it. No waiting.\n",
|
| 225 |
+
" for task in asyncio.as_completed(tasks):\n",
|
| 226 |
+
" model_name, answer = await task\n",
|
| 227 |
+
" competitors.append(model_name)\n",
|
| 228 |
+
" answers.append(answer)\n",
|
| 229 |
+
" print(f\"\\n✅ Got response from {model_name}\")\n",
|
| 230 |
+
"\n",
|
| 231 |
+
" return competitors, answers"
|
| 232 |
+
]
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"cell_type": "code",
|
| 236 |
+
"execution_count": null,
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"outputs": [],
|
| 239 |
+
"source": [
|
| 240 |
+
"# Fire off the ask to all the LLM's at once. Parallelization...\n",
|
| 241 |
+
"competitors, answers = await ask_questions_in_parallel(messages)"
|
| 242 |
+
]
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"cell_type": "code",
|
| 246 |
+
"execution_count": null,
|
| 247 |
+
"metadata": {},
|
| 248 |
+
"outputs": [],
|
| 249 |
+
"source": [
|
| 250 |
+
"#Look at the results\n",
|
| 251 |
+
"print (len(answers))\n",
|
| 252 |
+
"print (len(competitors))\n",
|
| 253 |
+
"print (competitors)"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"execution_count": null,
|
| 259 |
+
"metadata": {},
|
| 260 |
+
"outputs": [],
|
| 261 |
+
"source": [
|
| 262 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 263 |
+
"\n",
|
| 264 |
+
"together = \"\"\n",
|
| 265 |
+
"for index, answer in enumerate(answers):\n",
|
| 266 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 267 |
+
" together += answer + \"\\n\\n\""
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": null,
|
| 273 |
+
"metadata": {},
|
| 274 |
+
"outputs": [],
|
| 275 |
+
"source": [
|
| 276 |
+
"print(together)"
|
| 277 |
+
]
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
"cell_type": "code",
|
| 281 |
+
"execution_count": null,
|
| 282 |
+
"metadata": {},
|
| 283 |
+
"outputs": [],
|
| 284 |
+
"source": [
|
| 285 |
+
"\n",
|
| 286 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 287 |
+
"Each model has been given the folowing questions:\n",
|
| 288 |
+
"\n",
|
| 289 |
+
"{question}\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"Your task is to evaluate the overall strength of the arguments presented by each competitor. \n",
|
| 292 |
+
"Consider the following factors:\n",
|
| 293 |
+
"- Clarity: how clearly the ideas are communicated\n",
|
| 294 |
+
"- Relevance: how directly the response addresses the question\n",
|
| 295 |
+
"- Depth: the level of reasoning, insight, or supporting evidence provided\n",
|
| 296 |
+
"- Persuasiveness: how compelling or convincing the response is overall\n",
|
| 297 |
+
"Respond with JSON, and only JSON.\n",
|
| 298 |
+
"The output must be a single JSON array of competitor names, ordered from best to worst.\n",
|
| 299 |
+
"Do not include any keys, labels, or extra text.\n",
|
| 300 |
+
"\n",
|
| 301 |
+
"Example format:\n",
|
| 302 |
+
"[\"1\", \"3\", \"5\", \"2\", \"4\"]\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"Here are the responses from each competitor:\n",
|
| 305 |
+
"\n",
|
| 306 |
+
"{together}\n",
|
| 307 |
+
"\n",
|
| 308 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\n",
|
| 309 |
+
"Do not deviate from the json format as described above. Do not include the term ranking in the final json\"\"\"\n"
|
| 310 |
+
]
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"cell_type": "code",
|
| 314 |
+
"execution_count": null,
|
| 315 |
+
"metadata": {},
|
| 316 |
+
"outputs": [],
|
| 317 |
+
"source": [
|
| 318 |
+
"print(judge)"
|
| 319 |
+
]
|
| 320 |
+
},
|
| 321 |
+
{
|
| 322 |
+
"cell_type": "code",
|
| 323 |
+
"execution_count": null,
|
| 324 |
+
"metadata": {},
|
| 325 |
+
"outputs": [],
|
| 326 |
+
"source": [
|
| 327 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 328 |
+
]
|
| 329 |
+
},
|
| 330 |
+
{
|
| 331 |
+
"cell_type": "code",
|
| 332 |
+
"execution_count": null,
|
| 333 |
+
"metadata": {},
|
| 334 |
+
"outputs": [],
|
| 335 |
+
"source": [
|
| 336 |
+
"# Have each LLM rate all of the results.\n",
|
| 337 |
+
"results = dict()\n",
|
| 338 |
+
"LLM_result = ''\n",
|
| 339 |
+
"\n",
|
| 340 |
+
"competitors, answers = await ask_questions_in_parallel(judge_messages)\n",
|
| 341 |
+
"\n",
|
| 342 |
+
"results = dict()\n",
|
| 343 |
+
"for index, each_competitor in enumerate(competitors):\n",
|
| 344 |
+
" results[each_competitor] = answers[index].strip()"
|
| 345 |
+
]
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"cell_type": "code",
|
| 349 |
+
"execution_count": null,
|
| 350 |
+
"metadata": {},
|
| 351 |
+
"outputs": [],
|
| 352 |
+
"source": [
|
| 353 |
+
"# See the results\n",
|
| 354 |
+
"print (len(answers))\n",
|
| 355 |
+
"results = dict()\n",
|
| 356 |
+
"for index, each_competitor in enumerate(competitors):\n",
|
| 357 |
+
" results[each_competitor] = answers[index]\n",
|
| 358 |
+
"\n",
|
| 359 |
+
"print (results)"
|
| 360 |
+
]
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"cell_type": "code",
|
| 364 |
+
"execution_count": null,
|
| 365 |
+
"metadata": {},
|
| 366 |
+
"outputs": [],
|
| 367 |
+
"source": [
|
| 368 |
+
"# Lets convert these rankings into scores. Borda Count - (1st gets 4, 2nd gets 3, etc.).\n",
|
| 369 |
+
"number_of_competitors = len(competitors)\n",
|
| 370 |
+
"scores = {}\n",
|
| 371 |
+
"\n",
|
| 372 |
+
"for rankings in results.values():\n",
|
| 373 |
+
" print(rankings)"
|
| 374 |
+
]
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"cell_type": "code",
|
| 378 |
+
"execution_count": null,
|
| 379 |
+
"metadata": {},
|
| 380 |
+
"outputs": [],
|
| 381 |
+
"source": [
|
| 382 |
+
"# # Borda count points (1st gets n-1, 2nd gets n-2, etc.)\n",
|
| 383 |
+
"num_competitors = len(competitors)\n",
|
| 384 |
+
"\n",
|
| 385 |
+
"competitor_dict = dict()\n",
|
| 386 |
+
"for index, each_competitor in enumerate(competitors):\n",
|
| 387 |
+
" competitor_dict[each_competitor] = index + 1\n",
|
| 388 |
+
"\n",
|
| 389 |
+
"borda_scores_dict = dict()\n",
|
| 390 |
+
"for each_competitor in competitors:\n",
|
| 391 |
+
" if each_competitor not in borda_scores_dict:\n",
|
| 392 |
+
" borda_scores_dict[each_competitor] = 0\n",
|
| 393 |
+
"\n",
|
| 394 |
+
"for voter_llm, ranking_str in results.items():\n",
|
| 395 |
+
" ranking_indices = json.loads(ranking_str)\n",
|
| 396 |
+
" ranking_indices = [int(x) for x in ranking_indices]\n",
|
| 397 |
+
"\n",
|
| 398 |
+
" # For each position in the ranking, award points\n",
|
| 399 |
+
" for position, competitor_index in enumerate(ranking_indices):\n",
|
| 400 |
+
" competitor_name = competitors[competitor_index - 1]\n",
|
| 401 |
+
"\n",
|
| 402 |
+
" # Borda count points (1st gets n-1, 2nd gets n-2, etc.)\n",
|
| 403 |
+
" points = num_competitors - 1 - position \n",
|
| 404 |
+
" borda_scores_dict[competitor_name] += points\n",
|
| 405 |
+
" \n",
|
| 406 |
+
"sorted_results = sorted(borda_scores_dict.items(), key=lambda x: x[1], reverse=True)\n",
|
| 407 |
+
"\n",
|
| 408 |
+
"print(f\"{'Rank':<4} {'LLM':<30} {'Points':<3}\")\n",
|
| 409 |
+
"print(\"-\" * 50)\n",
|
| 410 |
+
"\n",
|
| 411 |
+
"for rank, (llm, points) in enumerate(sorted_results, 1):\n",
|
| 412 |
+
" print(f\"{rank:<4} {llm:<30} {points:<8}\")\n",
|
| 413 |
+
"\n",
|
| 414 |
+
"print(\"\\nQuestions asked:\")\n",
|
| 415 |
+
"print(question)"
|
| 416 |
+
]
|
| 417 |
+
}
|
| 418 |
+
],
|
| 419 |
+
"metadata": {
|
| 420 |
+
"kernelspec": {
|
| 421 |
+
"display_name": ".venv",
|
| 422 |
+
"language": "python",
|
| 423 |
+
"name": "python3"
|
| 424 |
+
},
|
| 425 |
+
"language_info": {
|
| 426 |
+
"codemirror_mode": {
|
| 427 |
+
"name": "ipython",
|
| 428 |
+
"version": 3
|
| 429 |
+
},
|
| 430 |
+
"file_extension": ".py",
|
| 431 |
+
"mimetype": "text/x-python",
|
| 432 |
+
"name": "python",
|
| 433 |
+
"nbconvert_exporter": "python",
|
| 434 |
+
"pygments_lexer": "ipython3",
|
| 435 |
+
"version": "3.12.2"
|
| 436 |
+
}
|
| 437 |
+
},
|
| 438 |
+
"nbformat": 4,
|
| 439 |
+
"nbformat_minor": 2
|
| 440 |
+
}
|
community_contributions/2_lab2.ipynb
ADDED
|
@@ -0,0 +1,517 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": null,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"import os #allows the code to interact with the operating system\n",
|
| 39 |
+
"import json #imports Python's JSON library\n",
|
| 40 |
+
"from dotenv import load_dotenv #allows the code to load the .env file. A .env file must be explicity loaded\n",
|
| 41 |
+
"from openai import OpenAI\n",
|
| 42 |
+
"from anthropic import Anthropic\n",
|
| 43 |
+
"from IPython.display import Markdown, display"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": 2,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [
|
| 51 |
+
{
|
| 52 |
+
"data": {
|
| 53 |
+
"text/plain": [
|
| 54 |
+
"True"
|
| 55 |
+
]
|
| 56 |
+
},
|
| 57 |
+
"execution_count": 2,
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"output_type": "execute_result"
|
| 60 |
+
}
|
| 61 |
+
],
|
| 62 |
+
"source": [
|
| 63 |
+
"# Always remember to do this!\n",
|
| 64 |
+
"load_dotenv(override=True) #prioritizes the local .env file and will replace existing env variables"
|
| 65 |
+
]
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"cell_type": "code",
|
| 69 |
+
"execution_count": 3,
|
| 70 |
+
"metadata": {},
|
| 71 |
+
"outputs": [
|
| 72 |
+
{
|
| 73 |
+
"name": "stdout",
|
| 74 |
+
"output_type": "stream",
|
| 75 |
+
"text": [
|
| 76 |
+
"OpenAI API Key exists and begins sk-proj-\n",
|
| 77 |
+
"Anthropic API Key not set (and this is optional)\n",
|
| 78 |
+
"Google API Key not set (and this is optional)\n",
|
| 79 |
+
"DeepSeek API Key not set (and this is optional)\n",
|
| 80 |
+
"Groq API Key not set (and this is optional)\n"
|
| 81 |
+
]
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"source": [
|
| 85 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 86 |
+
"\n",
|
| 87 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 88 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 89 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 90 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 91 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"if openai_api_key:\n",
|
| 94 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 95 |
+
"else:\n",
|
| 96 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 97 |
+
" \n",
|
| 98 |
+
"if anthropic_api_key:\n",
|
| 99 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 100 |
+
"else:\n",
|
| 101 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"if google_api_key:\n",
|
| 104 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 105 |
+
"else:\n",
|
| 106 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"if deepseek_api_key:\n",
|
| 109 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 110 |
+
"else:\n",
|
| 111 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 112 |
+
"\n",
|
| 113 |
+
"if groq_api_key:\n",
|
| 114 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 115 |
+
"else:\n",
|
| 116 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"cell_type": "code",
|
| 121 |
+
"execution_count": null,
|
| 122 |
+
"metadata": {},
|
| 123 |
+
"outputs": [],
|
| 124 |
+
"source": [
|
| 125 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 126 |
+
"request += \"Answer only with the question, no explanation. I want the question to be related to the cruelty of life\"\n",
|
| 127 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"cell_type": "code",
|
| 132 |
+
"execution_count": 5,
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [
|
| 135 |
+
{
|
| 136 |
+
"data": {
|
| 137 |
+
"text/plain": [
|
| 138 |
+
"[{'role': 'user',\n",
|
| 139 |
+
" 'content': 'Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. Answer only with the question, no explanation.'}]"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
"execution_count": 5,
|
| 143 |
+
"metadata": {},
|
| 144 |
+
"output_type": "execute_result"
|
| 145 |
+
}
|
| 146 |
+
],
|
| 147 |
+
"source": [
|
| 148 |
+
"messages"
|
| 149 |
+
]
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"cell_type": "code",
|
| 153 |
+
"execution_count": 7,
|
| 154 |
+
"metadata": {},
|
| 155 |
+
"outputs": [
|
| 156 |
+
{
|
| 157 |
+
"name": "stdout",
|
| 158 |
+
"output_type": "stream",
|
| 159 |
+
"text": [
|
| 160 |
+
"In a scenario where two intelligent agents with differing ethical frameworks encounter a moral dilemma involving a choice between the greater good and individual rights, how should they navigate their decision-making process, and what factors should they consider to justify their final actions?\n"
|
| 161 |
+
]
|
| 162 |
+
}
|
| 163 |
+
],
|
| 164 |
+
"source": [
|
| 165 |
+
"openai = OpenAI()\n",
|
| 166 |
+
"response = openai.chat.completions.create(\n",
|
| 167 |
+
" model=\"gpt-4o-mini\",\n",
|
| 168 |
+
" messages=messages,\n",
|
| 169 |
+
")\n",
|
| 170 |
+
"question = response.choices[0].message.content\n",
|
| 171 |
+
"print(question)\n"
|
| 172 |
+
]
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"cell_type": "code",
|
| 176 |
+
"execution_count": 7,
|
| 177 |
+
"metadata": {},
|
| 178 |
+
"outputs": [],
|
| 179 |
+
"source": [
|
| 180 |
+
"competitors = []\n",
|
| 181 |
+
"answers = []\n",
|
| 182 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 183 |
+
]
|
| 184 |
+
},
|
| 185 |
+
{
|
| 186 |
+
"cell_type": "code",
|
| 187 |
+
"execution_count": null,
|
| 188 |
+
"metadata": {},
|
| 189 |
+
"outputs": [],
|
| 190 |
+
"source": [
|
| 191 |
+
"# The API we know well\n",
|
| 192 |
+
"\n",
|
| 193 |
+
"model_name = \"gpt-4o-mini\"\n",
|
| 194 |
+
"\n",
|
| 195 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 196 |
+
"answer = response.choices[0].message.content\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"display(Markdown(answer))\n",
|
| 199 |
+
"competitors.append(model_name)\n",
|
| 200 |
+
"answers.append(answer)"
|
| 201 |
+
]
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"cell_type": "code",
|
| 205 |
+
"execution_count": null,
|
| 206 |
+
"metadata": {},
|
| 207 |
+
"outputs": [],
|
| 208 |
+
"source": [
|
| 209 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 210 |
+
"\n",
|
| 211 |
+
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 212 |
+
"\n",
|
| 213 |
+
"claude = Anthropic()\n",
|
| 214 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 215 |
+
"answer = response.content[0].text\n",
|
| 216 |
+
"\n",
|
| 217 |
+
"display(Markdown(answer))\n",
|
| 218 |
+
"competitors.append(model_name)\n",
|
| 219 |
+
"answers.append(answer)"
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"cell_type": "code",
|
| 224 |
+
"execution_count": null,
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"outputs": [],
|
| 227 |
+
"source": [
|
| 228 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 229 |
+
"model_name = \"gemini-2.0-flash\"\n",
|
| 230 |
+
"\n",
|
| 231 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 232 |
+
"answer = response.choices[0].message.content\n",
|
| 233 |
+
"\n",
|
| 234 |
+
"display(Markdown(answer))\n",
|
| 235 |
+
"competitors.append(model_name)\n",
|
| 236 |
+
"answers.append(answer)"
|
| 237 |
+
]
|
| 238 |
+
},
|
| 239 |
+
{
|
| 240 |
+
"cell_type": "code",
|
| 241 |
+
"execution_count": null,
|
| 242 |
+
"metadata": {},
|
| 243 |
+
"outputs": [],
|
| 244 |
+
"source": [
|
| 245 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 246 |
+
"model_name = \"deepseek-chat\"\n",
|
| 247 |
+
"\n",
|
| 248 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 249 |
+
"answer = response.choices[0].message.content\n",
|
| 250 |
+
"\n",
|
| 251 |
+
"display(Markdown(answer))\n",
|
| 252 |
+
"competitors.append(model_name)\n",
|
| 253 |
+
"answers.append(answer)"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"execution_count": null,
|
| 259 |
+
"metadata": {},
|
| 260 |
+
"outputs": [],
|
| 261 |
+
"source": [
|
| 262 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 263 |
+
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 266 |
+
"answer = response.choices[0].message.content\n",
|
| 267 |
+
"\n",
|
| 268 |
+
"display(Markdown(answer))\n",
|
| 269 |
+
"competitors.append(model_name)\n",
|
| 270 |
+
"answers.append(answer)\n"
|
| 271 |
+
]
|
| 272 |
+
},
|
| 273 |
+
{
|
| 274 |
+
"cell_type": "markdown",
|
| 275 |
+
"metadata": {},
|
| 276 |
+
"source": [
|
| 277 |
+
"## For the next cell, we will use Ollama\n",
|
| 278 |
+
"\n",
|
| 279 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 280 |
+
"and runs models locally using high performance C++ code.\n",
|
| 281 |
+
"\n",
|
| 282 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 283 |
+
"\n",
|
| 284 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 285 |
+
"\n",
|
| 286 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 289 |
+
"\n",
|
| 290 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 291 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 292 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 293 |
+
]
|
| 294 |
+
},
|
| 295 |
+
{
|
| 296 |
+
"cell_type": "markdown",
|
| 297 |
+
"metadata": {},
|
| 298 |
+
"source": [
|
| 299 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 300 |
+
" <tr>\n",
|
| 301 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 302 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 303 |
+
" </td>\n",
|
| 304 |
+
" <td>\n",
|
| 305 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 306 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 307 |
+
" </span>\n",
|
| 308 |
+
" </td>\n",
|
| 309 |
+
" </tr>\n",
|
| 310 |
+
"</table>"
|
| 311 |
+
]
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"cell_type": "code",
|
| 315 |
+
"execution_count": null,
|
| 316 |
+
"metadata": {},
|
| 317 |
+
"outputs": [],
|
| 318 |
+
"source": [
|
| 319 |
+
"!ollama pull llama3.2"
|
| 320 |
+
]
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"cell_type": "code",
|
| 324 |
+
"execution_count": null,
|
| 325 |
+
"metadata": {},
|
| 326 |
+
"outputs": [],
|
| 327 |
+
"source": [
|
| 328 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 329 |
+
"model_name = \"llama3.2\"\n",
|
| 330 |
+
"\n",
|
| 331 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 332 |
+
"answer = response.choices[0].message.content\n",
|
| 333 |
+
"\n",
|
| 334 |
+
"display(Markdown(answer))\n",
|
| 335 |
+
"competitors.append(model_name)\n",
|
| 336 |
+
"answers.append(answer)"
|
| 337 |
+
]
|
| 338 |
+
},
|
| 339 |
+
{
|
| 340 |
+
"cell_type": "code",
|
| 341 |
+
"execution_count": null,
|
| 342 |
+
"metadata": {},
|
| 343 |
+
"outputs": [],
|
| 344 |
+
"source": [
|
| 345 |
+
"# So where are we?\n",
|
| 346 |
+
"\n",
|
| 347 |
+
"print(competitors)\n",
|
| 348 |
+
"print(answers)\n"
|
| 349 |
+
]
|
| 350 |
+
},
|
| 351 |
+
{
|
| 352 |
+
"cell_type": "code",
|
| 353 |
+
"execution_count": null,
|
| 354 |
+
"metadata": {},
|
| 355 |
+
"outputs": [],
|
| 356 |
+
"source": [
|
| 357 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 358 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 359 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 360 |
+
]
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"cell_type": "code",
|
| 364 |
+
"execution_count": 20,
|
| 365 |
+
"metadata": {},
|
| 366 |
+
"outputs": [],
|
| 367 |
+
"source": [
|
| 368 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 369 |
+
"\n",
|
| 370 |
+
"together = \"\"\n",
|
| 371 |
+
"for index, answer in enumerate(answers):\n",
|
| 372 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 373 |
+
" together += answer + \"\\n\\n\""
|
| 374 |
+
]
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"cell_type": "code",
|
| 378 |
+
"execution_count": null,
|
| 379 |
+
"metadata": {},
|
| 380 |
+
"outputs": [],
|
| 381 |
+
"source": [
|
| 382 |
+
"print(together)"
|
| 383 |
+
]
|
| 384 |
+
},
|
| 385 |
+
{
|
| 386 |
+
"cell_type": "code",
|
| 387 |
+
"execution_count": 22,
|
| 388 |
+
"metadata": {},
|
| 389 |
+
"outputs": [],
|
| 390 |
+
"source": [
|
| 391 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 392 |
+
"Each model has been given this question:\n",
|
| 393 |
+
"\n",
|
| 394 |
+
"{question}\n",
|
| 395 |
+
"\n",
|
| 396 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 397 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 398 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 399 |
+
"\n",
|
| 400 |
+
"Here are the responses from each competitor:\n",
|
| 401 |
+
"\n",
|
| 402 |
+
"{together}\n",
|
| 403 |
+
"\n",
|
| 404 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 405 |
+
]
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"cell_type": "code",
|
| 409 |
+
"execution_count": null,
|
| 410 |
+
"metadata": {},
|
| 411 |
+
"outputs": [],
|
| 412 |
+
"source": [
|
| 413 |
+
"print(judge)"
|
| 414 |
+
]
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"cell_type": "code",
|
| 418 |
+
"execution_count": 29,
|
| 419 |
+
"metadata": {},
|
| 420 |
+
"outputs": [],
|
| 421 |
+
"source": [
|
| 422 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 423 |
+
]
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"cell_type": "code",
|
| 427 |
+
"execution_count": null,
|
| 428 |
+
"metadata": {},
|
| 429 |
+
"outputs": [],
|
| 430 |
+
"source": [
|
| 431 |
+
"# Judgement time!\n",
|
| 432 |
+
"\n",
|
| 433 |
+
"openai = OpenAI()\n",
|
| 434 |
+
"response = openai.chat.completions.create(\n",
|
| 435 |
+
" model=\"o3-mini\",\n",
|
| 436 |
+
" messages=judge_messages,\n",
|
| 437 |
+
")\n",
|
| 438 |
+
"results = response.choices[0].message.content\n",
|
| 439 |
+
"print(results)\n"
|
| 440 |
+
]
|
| 441 |
+
},
|
| 442 |
+
{
|
| 443 |
+
"cell_type": "code",
|
| 444 |
+
"execution_count": null,
|
| 445 |
+
"metadata": {},
|
| 446 |
+
"outputs": [],
|
| 447 |
+
"source": [
|
| 448 |
+
"# OK let's turn this into results!\n",
|
| 449 |
+
"\n",
|
| 450 |
+
"results_dict = json.loads(results)\n",
|
| 451 |
+
"ranks = results_dict[\"results\"]\n",
|
| 452 |
+
"for index, result in enumerate(ranks):\n",
|
| 453 |
+
" competitor = competitors[int(result)-1]\n",
|
| 454 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 455 |
+
]
|
| 456 |
+
},
|
| 457 |
+
{
|
| 458 |
+
"cell_type": "markdown",
|
| 459 |
+
"metadata": {},
|
| 460 |
+
"source": [
|
| 461 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 462 |
+
" <tr>\n",
|
| 463 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 464 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 465 |
+
" </td>\n",
|
| 466 |
+
" <td>\n",
|
| 467 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 468 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 469 |
+
" </span>\n",
|
| 470 |
+
" </td>\n",
|
| 471 |
+
" </tr>\n",
|
| 472 |
+
"</table>"
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
{
|
| 476 |
+
"cell_type": "markdown",
|
| 477 |
+
"metadata": {},
|
| 478 |
+
"source": [
|
| 479 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 480 |
+
" <tr>\n",
|
| 481 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 482 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 483 |
+
" </td>\n",
|
| 484 |
+
" <td>\n",
|
| 485 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 486 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 487 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 488 |
+
" to business projects where accuracy is critical.\n",
|
| 489 |
+
" </span>\n",
|
| 490 |
+
" </td>\n",
|
| 491 |
+
" </tr>\n",
|
| 492 |
+
"</table>"
|
| 493 |
+
]
|
| 494 |
+
}
|
| 495 |
+
],
|
| 496 |
+
"metadata": {
|
| 497 |
+
"kernelspec": {
|
| 498 |
+
"display_name": ".venv",
|
| 499 |
+
"language": "python",
|
| 500 |
+
"name": "python3"
|
| 501 |
+
},
|
| 502 |
+
"language_info": {
|
| 503 |
+
"codemirror_mode": {
|
| 504 |
+
"name": "ipython",
|
| 505 |
+
"version": 3
|
| 506 |
+
},
|
| 507 |
+
"file_extension": ".py",
|
| 508 |
+
"mimetype": "text/x-python",
|
| 509 |
+
"name": "python",
|
| 510 |
+
"nbconvert_exporter": "python",
|
| 511 |
+
"pygments_lexer": "ipython3",
|
| 512 |
+
"version": "3.12.12"
|
| 513 |
+
}
|
| 514 |
+
},
|
| 515 |
+
"nbformat": 4,
|
| 516 |
+
"nbformat_minor": 2
|
| 517 |
+
}
|
community_contributions/2_lab2_Execution_measurement.py
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import asyncio
|
| 4 |
+
import concurrent.futures
|
| 5 |
+
import time
|
| 6 |
+
from typing import Dict, List, Tuple, Optional
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
from openai import OpenAI
|
| 9 |
+
|
| 10 |
+
load_dotenv(override=True)
|
| 11 |
+
|
| 12 |
+
openai = OpenAI()
|
| 13 |
+
competitors = []
|
| 14 |
+
answers = []
|
| 15 |
+
together = ""
|
| 16 |
+
openai_api_key = os.getenv('OPENAI_API_KEY')
|
| 17 |
+
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
|
| 18 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 19 |
+
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
|
| 20 |
+
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 21 |
+
|
| 22 |
+
models_dict = {
|
| 23 |
+
'openai': {
|
| 24 |
+
'model': 'gpt-4o-mini',
|
| 25 |
+
'api_key': openai_api_key,
|
| 26 |
+
'base_url': None
|
| 27 |
+
},
|
| 28 |
+
'gemini': {
|
| 29 |
+
'model': 'gemini-2.0-flash',
|
| 30 |
+
'api_key': google_api_key,
|
| 31 |
+
'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai/'
|
| 32 |
+
},
|
| 33 |
+
'groq': {
|
| 34 |
+
'model': 'llama-3.3-70b-versatile',
|
| 35 |
+
'api_key': groq_api_key,
|
| 36 |
+
'base_url': 'https://api.groq.com/openai/v1'
|
| 37 |
+
},
|
| 38 |
+
'ollama': {
|
| 39 |
+
'model': 'llama3.2',
|
| 40 |
+
'api_key': 'ollama',
|
| 41 |
+
'base_url': 'http://localhost:11434/v1'
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
def key_checker():
|
| 46 |
+
|
| 47 |
+
if openai_api_key:
|
| 48 |
+
print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
|
| 49 |
+
else:
|
| 50 |
+
print("OpenAI API Key not set")
|
| 51 |
+
|
| 52 |
+
if anthropic_api_key:
|
| 53 |
+
print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
|
| 54 |
+
else:
|
| 55 |
+
print("Anthropic API Key not set (and this is optional)")
|
| 56 |
+
|
| 57 |
+
if google_api_key:
|
| 58 |
+
print(f"Google API Key exists and begins {google_api_key[:2]}")
|
| 59 |
+
else:
|
| 60 |
+
print("Google API Key not set (and this is optional)")
|
| 61 |
+
|
| 62 |
+
if deepseek_api_key:
|
| 63 |
+
print(f"DeepSeek API Key exists and begins {deepseek_api_key[:3]}")
|
| 64 |
+
else:
|
| 65 |
+
print("DeepSeek API Key not set (and this is optional)")
|
| 66 |
+
|
| 67 |
+
if groq_api_key:
|
| 68 |
+
print(f"Groq API Key exists and begins {groq_api_key[:4]}")
|
| 69 |
+
else:
|
| 70 |
+
print("Groq API Key not set (and this is optional)")
|
| 71 |
+
|
| 72 |
+
def question_prompt_generator():
|
| 73 |
+
request = "Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. "
|
| 74 |
+
request += "Answer only with the question, no explanation."
|
| 75 |
+
messages = [{"role": "user", "content": request}]
|
| 76 |
+
return messages
|
| 77 |
+
|
| 78 |
+
def generate_competition_question():
|
| 79 |
+
"""
|
| 80 |
+
Generate a challenging question for the LLM competition
|
| 81 |
+
Returns the question text and formatted messages for LLM calls
|
| 82 |
+
"""
|
| 83 |
+
print("Generating competition question...")
|
| 84 |
+
question_prompt = question_prompt_generator()
|
| 85 |
+
question = llm_caller(question_prompt)
|
| 86 |
+
question_messages = [{"role": "user", "content": question}]
|
| 87 |
+
print(f"Question: \n{question}")
|
| 88 |
+
return question, question_messages
|
| 89 |
+
|
| 90 |
+
def llm_caller(messages):
|
| 91 |
+
response = openai.chat.completions.create(
|
| 92 |
+
model="gpt-4o-mini",
|
| 93 |
+
messages=messages,
|
| 94 |
+
)
|
| 95 |
+
return response.choices[0].message.content
|
| 96 |
+
|
| 97 |
+
def llm_caller_with_model(messages, model_name, api_key, base_url):
|
| 98 |
+
llm = None
|
| 99 |
+
|
| 100 |
+
if base_url:
|
| 101 |
+
try:
|
| 102 |
+
llm = OpenAI(api_key=api_key, base_url=base_url)
|
| 103 |
+
except Exception as e:
|
| 104 |
+
print(f"Error creating OpenAI client: {e}")
|
| 105 |
+
return None
|
| 106 |
+
else:
|
| 107 |
+
try:
|
| 108 |
+
llm = OpenAI(api_key=api_key)
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"Error creating OpenAI client: {e}")
|
| 111 |
+
return None
|
| 112 |
+
|
| 113 |
+
response = llm.chat.completions.create(model=model_name, messages=messages)
|
| 114 |
+
return response.choices[0].message.content
|
| 115 |
+
|
| 116 |
+
def get_single_model_answer(provider: str, details: Dict, question_messages: List[Dict]) -> Tuple[str, Optional[str]]:
|
| 117 |
+
"""
|
| 118 |
+
Call a single model and return (provider, answer) or (provider, None) if failed.
|
| 119 |
+
This function is designed to be used with ThreadPoolExecutor.
|
| 120 |
+
"""
|
| 121 |
+
print(f"Calling model {provider}...")
|
| 122 |
+
try:
|
| 123 |
+
answer = llm_caller_with_model(question_messages, details['model'], details['api_key'], details['base_url'])
|
| 124 |
+
print(f"Model {provider} was successfully called!")
|
| 125 |
+
return provider, answer
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print(f"Model {provider} failed to call: {e}")
|
| 128 |
+
return provider, None
|
| 129 |
+
|
| 130 |
+
def get_models_answers(question_messages):
|
| 131 |
+
"""
|
| 132 |
+
Sequential version - kept for backward compatibility
|
| 133 |
+
"""
|
| 134 |
+
for provider, details in models_dict.items():
|
| 135 |
+
print(f"Calling model {provider}...")
|
| 136 |
+
try:
|
| 137 |
+
answer = llm_caller_with_model(question_messages, details['model'], details['api_key'], details['base_url'])
|
| 138 |
+
print(f"Model {provider} was successful called!")
|
| 139 |
+
except Exception as e:
|
| 140 |
+
print(f"Model {provider} failed to call: {e}")
|
| 141 |
+
continue
|
| 142 |
+
competitors.append(provider)
|
| 143 |
+
answers.append(answer)
|
| 144 |
+
|
| 145 |
+
def get_models_answers_parallel(question_messages, max_workers: int = 4):
|
| 146 |
+
"""
|
| 147 |
+
Parallel version - calls all models simultaneously using ThreadPoolExecutor
|
| 148 |
+
"""
|
| 149 |
+
print("Starting parallel execution of all models...")
|
| 150 |
+
|
| 151 |
+
# Clear previous results
|
| 152 |
+
competitors.clear()
|
| 153 |
+
answers.clear()
|
| 154 |
+
|
| 155 |
+
# Use ThreadPoolExecutor for parallel execution
|
| 156 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 157 |
+
# Submit all tasks
|
| 158 |
+
future_to_provider = {
|
| 159 |
+
executor.submit(get_single_model_answer, provider, details, question_messages): provider
|
| 160 |
+
for provider, details in models_dict.items()
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
# Collect results as they complete
|
| 164 |
+
for future in concurrent.futures.as_completed(future_to_provider):
|
| 165 |
+
provider, answer = future.result()
|
| 166 |
+
if answer is not None: # Only add successful calls
|
| 167 |
+
competitors.append(provider)
|
| 168 |
+
answers.append(answer)
|
| 169 |
+
|
| 170 |
+
print(f"Parallel execution completed. {len(competitors)} models responded successfully.")
|
| 171 |
+
|
| 172 |
+
async def get_single_model_answer_async(provider: str, details: Dict, question_messages: List[Dict]) -> Tuple[str, Optional[str]]:
|
| 173 |
+
"""
|
| 174 |
+
Async version of single model call - for even better performance
|
| 175 |
+
"""
|
| 176 |
+
print(f"Calling model {provider} (async)...")
|
| 177 |
+
try:
|
| 178 |
+
# Run the synchronous call in a thread pool
|
| 179 |
+
loop = asyncio.get_event_loop()
|
| 180 |
+
answer = await loop.run_in_executor(
|
| 181 |
+
None,
|
| 182 |
+
llm_caller_with_model,
|
| 183 |
+
question_messages,
|
| 184 |
+
details['model'],
|
| 185 |
+
details['api_key'],
|
| 186 |
+
details['base_url']
|
| 187 |
+
)
|
| 188 |
+
print(f"Model {provider} was successfully called!")
|
| 189 |
+
return provider, answer
|
| 190 |
+
except Exception as e:
|
| 191 |
+
print(f"Model {provider} failed to call: {e}")
|
| 192 |
+
return provider, None
|
| 193 |
+
|
| 194 |
+
async def get_models_answers_async(question_messages):
|
| 195 |
+
"""
|
| 196 |
+
Async version - calls all models simultaneously using asyncio
|
| 197 |
+
"""
|
| 198 |
+
print("Starting async execution of all models...")
|
| 199 |
+
|
| 200 |
+
# Clear previous results
|
| 201 |
+
competitors.clear()
|
| 202 |
+
answers.clear()
|
| 203 |
+
|
| 204 |
+
# Create tasks for all models
|
| 205 |
+
tasks = [
|
| 206 |
+
get_single_model_answer_async(provider, details, question_messages)
|
| 207 |
+
for provider, details in models_dict.items()
|
| 208 |
+
]
|
| 209 |
+
|
| 210 |
+
# Wait for all tasks to complete
|
| 211 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 212 |
+
|
| 213 |
+
# Process results
|
| 214 |
+
for result in results:
|
| 215 |
+
if isinstance(result, Exception):
|
| 216 |
+
print(f"Task failed with exception: {result}")
|
| 217 |
+
continue
|
| 218 |
+
provider, answer = result
|
| 219 |
+
if answer is not None: # Only add successful calls
|
| 220 |
+
competitors.append(provider)
|
| 221 |
+
answers.append(answer)
|
| 222 |
+
|
| 223 |
+
print(f"Async execution completed. {len(competitors)} models responded successfully.")
|
| 224 |
+
|
| 225 |
+
def together_maker(answers):
|
| 226 |
+
together = ""
|
| 227 |
+
for index, answer in enumerate(answers):
|
| 228 |
+
together += f"# Response from competitor {index+1}\n\n"
|
| 229 |
+
together += answer + "\n\n"
|
| 230 |
+
return together
|
| 231 |
+
|
| 232 |
+
def judge_prompt_generator(competitors, question, together):
|
| 233 |
+
judge = f"""You are judging a competition between {len(competitors)} competitors.
|
| 234 |
+
Each model has been given this question:
|
| 235 |
+
|
| 236 |
+
{question}
|
| 237 |
+
|
| 238 |
+
Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.
|
| 239 |
+
Respond with JSON, and only JSON, with the following format:
|
| 240 |
+
{{"results": ["best competitor number", "second best competitor number", "third best competitor number", ...]}}
|
| 241 |
+
|
| 242 |
+
Here are the responses from each competitor:
|
| 243 |
+
|
| 244 |
+
{together}
|
| 245 |
+
|
| 246 |
+
Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks."""
|
| 247 |
+
return judge
|
| 248 |
+
|
| 249 |
+
def judge_caller(judge_prompt, competitors):
|
| 250 |
+
print(f"Calling judge...")
|
| 251 |
+
judge_messages = [{"role": "user", "content": judge_prompt}]
|
| 252 |
+
results = llm_caller_with_model(judge_messages, "o3-mini", openai_api_key, None)
|
| 253 |
+
results_dict = json.loads(results)
|
| 254 |
+
ranks = results_dict["results"]
|
| 255 |
+
for index, result in enumerate(ranks):
|
| 256 |
+
competitor = competitors[int(result)-1]
|
| 257 |
+
print(f"Rank {index+1}: {competitor}")
|
| 258 |
+
return ranks
|
| 259 |
+
|
| 260 |
+
def compare_execution_methods(question_messages, runs_per_method=1):
|
| 261 |
+
"""
|
| 262 |
+
Compare performance of different execution methods
|
| 263 |
+
"""
|
| 264 |
+
methods = ['sequential', 'parallel', 'async']
|
| 265 |
+
results = {}
|
| 266 |
+
|
| 267 |
+
for method in methods:
|
| 268 |
+
print(f"\n{'='*50}")
|
| 269 |
+
print(f"Testing {method} execution method")
|
| 270 |
+
print(f"{'='*50}")
|
| 271 |
+
|
| 272 |
+
method_times = []
|
| 273 |
+
|
| 274 |
+
for run in range(runs_per_method):
|
| 275 |
+
print(f"\nRun {run + 1}/{runs_per_method}")
|
| 276 |
+
|
| 277 |
+
# Clear previous results
|
| 278 |
+
competitors.clear()
|
| 279 |
+
answers.clear()
|
| 280 |
+
|
| 281 |
+
start_time = time.time()
|
| 282 |
+
|
| 283 |
+
if method == 'sequential':
|
| 284 |
+
get_models_answers(question_messages)
|
| 285 |
+
elif method == 'parallel':
|
| 286 |
+
get_models_answers_parallel(question_messages, max_workers=4)
|
| 287 |
+
elif method == 'async':
|
| 288 |
+
asyncio.run(get_models_answers_async(question_messages))
|
| 289 |
+
|
| 290 |
+
execution_time = time.time() - start_time
|
| 291 |
+
method_times.append(execution_time)
|
| 292 |
+
print(f"Run {run + 1} completed in {execution_time:.2f} seconds")
|
| 293 |
+
|
| 294 |
+
avg_time = sum(method_times) / len(method_times)
|
| 295 |
+
results[method] = {
|
| 296 |
+
'times': method_times,
|
| 297 |
+
'avg_time': avg_time,
|
| 298 |
+
'successful_models': len(competitors)
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
print(f"\n{method.upper()} Results:")
|
| 302 |
+
print(f" Average time: {avg_time:.2f} seconds")
|
| 303 |
+
print(f" Successful models: {len(competitors)}")
|
| 304 |
+
print(f" All times: {[f'{t:.2f}s' for t in method_times]}")
|
| 305 |
+
|
| 306 |
+
# Print comparison summary
|
| 307 |
+
print(f"\n{'='*60}")
|
| 308 |
+
print("PERFORMANCE COMPARISON SUMMARY")
|
| 309 |
+
print(f"{'='*60}")
|
| 310 |
+
|
| 311 |
+
for method, data in results.items():
|
| 312 |
+
print(f"{method.upper():>12}: {data['avg_time']:>6.2f}s avg, {data['successful_models']} models")
|
| 313 |
+
|
| 314 |
+
# Calculate speedup
|
| 315 |
+
if 'sequential' in results:
|
| 316 |
+
seq_time = results['sequential']['avg_time']
|
| 317 |
+
print(f"\nSpeedup vs Sequential:")
|
| 318 |
+
for method, data in results.items():
|
| 319 |
+
if method != 'sequential':
|
| 320 |
+
speedup = seq_time / data['avg_time']
|
| 321 |
+
print(f" {method.upper()}: {speedup:.2f}x faster")
|
| 322 |
+
|
| 323 |
+
return results
|
| 324 |
+
|
| 325 |
+
def run_llm_competition(question_messages, execution_method, question):
|
| 326 |
+
"""
|
| 327 |
+
Run the LLM competition with the specified execution method
|
| 328 |
+
"""
|
| 329 |
+
print(f"\nUsing {execution_method} execution method...")
|
| 330 |
+
start_time = time.time()
|
| 331 |
+
|
| 332 |
+
if execution_method == 'sequential':
|
| 333 |
+
get_models_answers(question_messages)
|
| 334 |
+
elif execution_method == 'parallel':
|
| 335 |
+
get_models_answers_parallel(question_messages, max_workers=4)
|
| 336 |
+
elif execution_method == 'async':
|
| 337 |
+
asyncio.run(get_models_answers_async(question_messages))
|
| 338 |
+
else:
|
| 339 |
+
raise ValueError(f"Unknown execution method: {execution_method}")
|
| 340 |
+
|
| 341 |
+
execution_time = time.time() - start_time
|
| 342 |
+
print(f"Execution completed in {execution_time:.2f} seconds")
|
| 343 |
+
|
| 344 |
+
together = together_maker(answers)
|
| 345 |
+
judge_prompt = judge_prompt_generator(competitors, question, together)
|
| 346 |
+
judge_caller(judge_prompt, competitors)
|
| 347 |
+
|
| 348 |
+
return execution_time
|
| 349 |
+
|
| 350 |
+
# Interactive execution method selection
|
| 351 |
+
def get_execution_method():
|
| 352 |
+
"""
|
| 353 |
+
Prompt user to select execution method
|
| 354 |
+
"""
|
| 355 |
+
print("\n" + "="*60)
|
| 356 |
+
print("EXECUTION METHOD SELECTION")
|
| 357 |
+
print("="*60)
|
| 358 |
+
print("Choose how to execute the LLM calls:")
|
| 359 |
+
print("1. Sequential - Call models one after another (original method)")
|
| 360 |
+
print("2. Parallel - Call all models simultaneously (recommended)")
|
| 361 |
+
print("3. Async - Use async/await for maximum performance")
|
| 362 |
+
print("4. Compare - Run all methods and compare performance")
|
| 363 |
+
print("="*60)
|
| 364 |
+
|
| 365 |
+
while True:
|
| 366 |
+
try:
|
| 367 |
+
choice = input("Enter your choice (1-4): ").strip()
|
| 368 |
+
|
| 369 |
+
if choice == '1':
|
| 370 |
+
return 'sequential'
|
| 371 |
+
elif choice == '2':
|
| 372 |
+
return 'parallel'
|
| 373 |
+
elif choice == '3':
|
| 374 |
+
return 'async'
|
| 375 |
+
elif choice == '4':
|
| 376 |
+
return 'compare'
|
| 377 |
+
else:
|
| 378 |
+
print("Invalid choice. Please enter 1, 2, 3, or 4.")
|
| 379 |
+
continue
|
| 380 |
+
except KeyboardInterrupt:
|
| 381 |
+
print("\nExiting...")
|
| 382 |
+
exit(0)
|
| 383 |
+
except EOFError:
|
| 384 |
+
print("\nExiting...")
|
| 385 |
+
exit(0)
|
| 386 |
+
|
| 387 |
+
def main():
|
| 388 |
+
key_checker()
|
| 389 |
+
|
| 390 |
+
# Get user's execution method choice
|
| 391 |
+
EXECUTION_METHOD = get_execution_method()
|
| 392 |
+
# Generate the competition question and get the question messages
|
| 393 |
+
question, question_messages = generate_competition_question()
|
| 394 |
+
|
| 395 |
+
if EXECUTION_METHOD == 'compare':
|
| 396 |
+
print("\nRunning performance comparison...")
|
| 397 |
+
compare_execution_methods(question_messages, runs_per_method=1)
|
| 398 |
+
else:
|
| 399 |
+
run_llm_competition(question_messages, EXECUTION_METHOD, question)
|
| 400 |
+
|
| 401 |
+
main()
|
community_contributions/2_lab2_ReAct_Pattern.ipynb
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "markdown",
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"source": [
|
| 34 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 35 |
+
" <tr>\n",
|
| 36 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 37 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 38 |
+
" </td>\n",
|
| 39 |
+
" <td>\n",
|
| 40 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 41 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 42 |
+
" </span>\n",
|
| 43 |
+
" </td>\n",
|
| 44 |
+
" </tr>\n",
|
| 45 |
+
"</table>"
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "markdown",
|
| 50 |
+
"metadata": {},
|
| 51 |
+
"source": [
|
| 52 |
+
"# ReAct Pattern"
|
| 53 |
+
]
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"cell_type": "code",
|
| 57 |
+
"execution_count": 26,
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"outputs": [],
|
| 60 |
+
"source": [
|
| 61 |
+
"import openai\n",
|
| 62 |
+
"import os\n",
|
| 63 |
+
"from dotenv import load_dotenv\n",
|
| 64 |
+
"import io\n",
|
| 65 |
+
"from anthropic import Anthropic\n",
|
| 66 |
+
"from IPython.display import Markdown, display"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"cell_type": "code",
|
| 71 |
+
"execution_count": null,
|
| 72 |
+
"metadata": {},
|
| 73 |
+
"outputs": [],
|
| 74 |
+
"source": [
|
| 75 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 78 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 79 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 80 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 81 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 82 |
+
"\n",
|
| 83 |
+
"if openai_api_key:\n",
|
| 84 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 85 |
+
"else:\n",
|
| 86 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 87 |
+
" \n",
|
| 88 |
+
"if anthropic_api_key:\n",
|
| 89 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 90 |
+
"else:\n",
|
| 91 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"if google_api_key:\n",
|
| 94 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 95 |
+
"else:\n",
|
| 96 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 97 |
+
"\n",
|
| 98 |
+
"if deepseek_api_key:\n",
|
| 99 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 100 |
+
"else:\n",
|
| 101 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"if groq_api_key:\n",
|
| 104 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 105 |
+
"else:\n",
|
| 106 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "code",
|
| 111 |
+
"execution_count": 50,
|
| 112 |
+
"metadata": {},
|
| 113 |
+
"outputs": [],
|
| 114 |
+
"source": [
|
| 115 |
+
"\n",
|
| 116 |
+
"from openai import OpenAI\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"openai = OpenAI()\n",
|
| 119 |
+
"\n",
|
| 120 |
+
"# Request prompt\n",
|
| 121 |
+
"request = (\n",
|
| 122 |
+
" \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 123 |
+
" \"Answer only with the question, no explanation.\"\n",
|
| 124 |
+
")\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"\n",
|
| 128 |
+
"def generate_question(prompt: str) -> str:\n",
|
| 129 |
+
" response = openai.chat.completions.create(\n",
|
| 130 |
+
" model='gpt-4o-mini',\n",
|
| 131 |
+
" messages=[{'role': 'user', 'content': prompt}]\n",
|
| 132 |
+
" )\n",
|
| 133 |
+
" question = response.choices[0].message.content\n",
|
| 134 |
+
" return question\n",
|
| 135 |
+
"\n",
|
| 136 |
+
"def react_agent_decide_model(question: str) -> str:\n",
|
| 137 |
+
" prompt = f\"\"\"\n",
|
| 138 |
+
" You are an intelligent AI assistant tasked with evaluating which language model is most suitable to answer a given question.\n",
|
| 139 |
+
"\n",
|
| 140 |
+
" Available models:\n",
|
| 141 |
+
" - OpenAI: excels at reasoning and factual answers.\n",
|
| 142 |
+
" - Claude: better for philosophical, nuanced, and ethical topics.\n",
|
| 143 |
+
" - Gemini: good for concise and structured summaries.\n",
|
| 144 |
+
" - Groq: good for creative or exploratory tasks.\n",
|
| 145 |
+
" - DeepSeek: strong at coding, technical reasoning, and multilingual responses.\n",
|
| 146 |
+
"\n",
|
| 147 |
+
" Here is the question to answer:\n",
|
| 148 |
+
" \"{question}\"\n",
|
| 149 |
+
"\n",
|
| 150 |
+
" ### Thought:\n",
|
| 151 |
+
" Which model is best suited to answer this question, and why?\n",
|
| 152 |
+
"\n",
|
| 153 |
+
" ### Action:\n",
|
| 154 |
+
" Respond with only the model name you choose (e.g., \"Claude\").\n",
|
| 155 |
+
" \"\"\"\n",
|
| 156 |
+
"\n",
|
| 157 |
+
" response = openai.chat.completions.create(\n",
|
| 158 |
+
" model=\"o3-mini\",\n",
|
| 159 |
+
" messages=[{\"role\": \"user\", \"content\": prompt}]\n",
|
| 160 |
+
" )\n",
|
| 161 |
+
" model = response.choices[0].message.content.strip()\n",
|
| 162 |
+
" return model\n",
|
| 163 |
+
"\n",
|
| 164 |
+
"def generate_answer_openai(prompt):\n",
|
| 165 |
+
" answer = openai.chat.completions.create(\n",
|
| 166 |
+
" model='gpt-4o-mini',\n",
|
| 167 |
+
" messages=[{'role': 'user', 'content': prompt}]\n",
|
| 168 |
+
" ).choices[0].message.content\n",
|
| 169 |
+
" return answer\n",
|
| 170 |
+
"\n",
|
| 171 |
+
"def generate_answer_anthropic(prompt):\n",
|
| 172 |
+
" anthropic = Anthropic(api_key=anthropic_api_key)\n",
|
| 173 |
+
" model_name = \"claude-3-5-sonnet-20240620\"\n",
|
| 174 |
+
" answer = anthropic.messages.create(\n",
|
| 175 |
+
" model=model_name,\n",
|
| 176 |
+
" messages=[{'role': 'user', 'content': prompt}],\n",
|
| 177 |
+
" max_tokens=1000\n",
|
| 178 |
+
" ).content[0].text\n",
|
| 179 |
+
" return answer\n",
|
| 180 |
+
"\n",
|
| 181 |
+
"def generate_answer_deepseek(prompt):\n",
|
| 182 |
+
" deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 183 |
+
" model_name = \"deepseek-chat\" \n",
|
| 184 |
+
" answer = deepseek.chat.completions.create(\n",
|
| 185 |
+
" model=model_name,\n",
|
| 186 |
+
" messages=[{'role': 'user', 'content': prompt}],\n",
|
| 187 |
+
" base_url='https://api.deepseek.com/v1'\n",
|
| 188 |
+
" ).choices[0].message.content\n",
|
| 189 |
+
" return answer\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"def generate_answer_gemini(prompt):\n",
|
| 192 |
+
" gemini=OpenAI(base_url='https://generativelanguage.googleapis.com/v1beta/openai/',api_key=google_api_key)\n",
|
| 193 |
+
" model_name = \"gemini-2.0-flash\"\n",
|
| 194 |
+
" answer = gemini.chat.completions.create(\n",
|
| 195 |
+
" model=model_name,\n",
|
| 196 |
+
" messages=[{'role': 'user', 'content': prompt}],\n",
|
| 197 |
+
" ).choices[0].message.content\n",
|
| 198 |
+
" return answer\n",
|
| 199 |
+
"\n",
|
| 200 |
+
"def generate_answer_groq(prompt):\n",
|
| 201 |
+
" groq=OpenAI(base_url='https://api.groq.com/openai/v1',api_key=groq_api_key)\n",
|
| 202 |
+
" model_name=\"llama3-70b-8192\"\n",
|
| 203 |
+
" answer = groq.chat.completions.create(\n",
|
| 204 |
+
" model=model_name,\n",
|
| 205 |
+
" messages=[{'role': 'user', 'content': prompt}],\n",
|
| 206 |
+
" base_url=\"https://api.groq.com/openai/v1\"\n",
|
| 207 |
+
" ).choices[0].message.content\n",
|
| 208 |
+
" return answer\n",
|
| 209 |
+
"\n",
|
| 210 |
+
"def main():\n",
|
| 211 |
+
" print(\"Generating question...\")\n",
|
| 212 |
+
" question = generate_question(request)\n",
|
| 213 |
+
" print(f\"\\n🧠 Question: {question}\\n\")\n",
|
| 214 |
+
" selected_model = react_agent_decide_model(question)\n",
|
| 215 |
+
" print(f\"\\n🔹 {selected_model}:\\n\")\n",
|
| 216 |
+
" \n",
|
| 217 |
+
" if selected_model.lower() == \"openai\":\n",
|
| 218 |
+
" answer = generate_answer_openai(question)\n",
|
| 219 |
+
" elif selected_model.lower() == \"deepseek\":\n",
|
| 220 |
+
" answer = generate_answer_deepseek(question)\n",
|
| 221 |
+
" elif selected_model.lower() == \"gemini\":\n",
|
| 222 |
+
" answer = generate_answer_gemini(question)\n",
|
| 223 |
+
" elif selected_model.lower() == \"groq\":\n",
|
| 224 |
+
" answer = generate_answer_groq(question)\n",
|
| 225 |
+
" elif selected_model.lower() == \"claude\":\n",
|
| 226 |
+
" answer = generate_answer_anthropic(question)\n",
|
| 227 |
+
" print(f\"\\n🔹 {selected_model}:\\n{answer}\\n\")\n",
|
| 228 |
+
" \n"
|
| 229 |
+
]
|
| 230 |
+
},
|
| 231 |
+
{
|
| 232 |
+
"cell_type": "code",
|
| 233 |
+
"execution_count": null,
|
| 234 |
+
"metadata": {},
|
| 235 |
+
"outputs": [],
|
| 236 |
+
"source": [
|
| 237 |
+
"main()"
|
| 238 |
+
]
|
| 239 |
+
},
|
| 240 |
+
{
|
| 241 |
+
"cell_type": "code",
|
| 242 |
+
"execution_count": null,
|
| 243 |
+
"metadata": {},
|
| 244 |
+
"outputs": [],
|
| 245 |
+
"source": []
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"cell_type": "markdown",
|
| 249 |
+
"metadata": {},
|
| 250 |
+
"source": [
|
| 251 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 252 |
+
" <tr>\n",
|
| 253 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 254 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 255 |
+
" </td>\n",
|
| 256 |
+
" <td>\n",
|
| 257 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 258 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 259 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 260 |
+
" to business projects where accuracy is critical.\n",
|
| 261 |
+
" </span>\n",
|
| 262 |
+
" </td>\n",
|
| 263 |
+
" </tr>\n",
|
| 264 |
+
"</table>"
|
| 265 |
+
]
|
| 266 |
+
}
|
| 267 |
+
],
|
| 268 |
+
"metadata": {
|
| 269 |
+
"kernelspec": {
|
| 270 |
+
"display_name": ".venv",
|
| 271 |
+
"language": "python",
|
| 272 |
+
"name": "python3"
|
| 273 |
+
},
|
| 274 |
+
"language_info": {
|
| 275 |
+
"codemirror_mode": {
|
| 276 |
+
"name": "ipython",
|
| 277 |
+
"version": 3
|
| 278 |
+
},
|
| 279 |
+
"file_extension": ".py",
|
| 280 |
+
"mimetype": "text/x-python",
|
| 281 |
+
"name": "python",
|
| 282 |
+
"nbconvert_exporter": "python",
|
| 283 |
+
"pygments_lexer": "ipython3",
|
| 284 |
+
"version": "3.12.4"
|
| 285 |
+
}
|
| 286 |
+
},
|
| 287 |
+
"nbformat": 4,
|
| 288 |
+
"nbformat_minor": 2
|
| 289 |
+
}
|
community_contributions/2_lab2_akash_parallelization.ipynb
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "code",
|
| 14 |
+
"execution_count": null,
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"import os\n",
|
| 21 |
+
"import json\n",
|
| 22 |
+
"from dotenv import load_dotenv\n",
|
| 23 |
+
"from openai import OpenAI, AsyncOpenAI\n",
|
| 24 |
+
"from IPython.display import Markdown, display\n",
|
| 25 |
+
"import asyncio\n",
|
| 26 |
+
"from functools import partial"
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"execution_count": null,
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"outputs": [],
|
| 34 |
+
"source": [
|
| 35 |
+
"# Always remember to do this!\n",
|
| 36 |
+
"load_dotenv(override=True)"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "code",
|
| 41 |
+
"execution_count": null,
|
| 42 |
+
"metadata": {},
|
| 43 |
+
"outputs": [],
|
| 44 |
+
"source": [
|
| 45 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 46 |
+
"\n",
|
| 47 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 48 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 49 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 50 |
+
"\n",
|
| 51 |
+
"if openai_api_key:\n",
|
| 52 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 53 |
+
"else:\n",
|
| 54 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"\n",
|
| 57 |
+
"if google_api_key:\n",
|
| 58 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 59 |
+
"else:\n",
|
| 60 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 61 |
+
"\n",
|
| 62 |
+
"if groq_api_key:\n",
|
| 63 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 64 |
+
"else:\n",
|
| 65 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"cell_type": "code",
|
| 70 |
+
"execution_count": null,
|
| 71 |
+
"metadata": {},
|
| 72 |
+
"outputs": [],
|
| 73 |
+
"source": [
|
| 74 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 75 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 76 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": null,
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"outputs": [],
|
| 84 |
+
"source": [
|
| 85 |
+
"openai = AsyncOpenAI()\n",
|
| 86 |
+
"response = await openai.chat.completions.create(\n",
|
| 87 |
+
" model=\"gpt-4o-mini\",\n",
|
| 88 |
+
" messages=messages,\n",
|
| 89 |
+
")\n",
|
| 90 |
+
"question = response.choices[0].message.content\n",
|
| 91 |
+
"print(question)\n"
|
| 92 |
+
]
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"cell_type": "code",
|
| 96 |
+
"execution_count": null,
|
| 97 |
+
"metadata": {},
|
| 98 |
+
"outputs": [],
|
| 99 |
+
"source": [
|
| 100 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 101 |
+
]
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"cell_type": "code",
|
| 105 |
+
"execution_count": null,
|
| 106 |
+
"metadata": {},
|
| 107 |
+
"outputs": [],
|
| 108 |
+
"source": [
|
| 109 |
+
"from dataclasses import dataclass\n",
|
| 110 |
+
"\n",
|
| 111 |
+
"@dataclass\n",
|
| 112 |
+
"class LLMResource:\n",
|
| 113 |
+
" api_key: str\n",
|
| 114 |
+
" model: str\n",
|
| 115 |
+
" url: str = None # optional otherwise NOone\n",
|
| 116 |
+
"\n",
|
| 117 |
+
"llm_resources = [\n",
|
| 118 |
+
" LLMResource(api_key=openai_api_key, model=\"gpt-4o-mini\"),\n",
|
| 119 |
+
" LLMResource(api_key=google_api_key, model=\"gemini-2.5-flash\", url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"),\n",
|
| 120 |
+
" LLMResource(api_key=groq_api_key, model=\"qwen/qwen3-32b\", url=\"https://api.groq.com/openai/v1\"),\n",
|
| 121 |
+
" LLMResource(api_key=\"ollama\", model=\"deepseek-r1:1.5b\", url=\"http://localhost:11434/v1\" )\n",
|
| 122 |
+
"]\n"
|
| 123 |
+
]
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"cell_type": "code",
|
| 127 |
+
"execution_count": null,
|
| 128 |
+
"metadata": {},
|
| 129 |
+
"outputs": [],
|
| 130 |
+
"source": [
|
| 131 |
+
"\n",
|
| 132 |
+
"\n",
|
| 133 |
+
"async def llm_call(key, model_name, url, messages) -> tuple:\n",
|
| 134 |
+
" if url is None:\n",
|
| 135 |
+
" llm = AsyncOpenAI(api_key=key)\n",
|
| 136 |
+
" else: \n",
|
| 137 |
+
" llm = AsyncOpenAI(base_url=url,api_key=key)\n",
|
| 138 |
+
" \n",
|
| 139 |
+
" response = await llm.chat.completions.create(\n",
|
| 140 |
+
" model=model_name, messages=messages)\n",
|
| 141 |
+
" \n",
|
| 142 |
+
" answer = (model_name, response.choices[0].message.content)\n",
|
| 143 |
+
"\n",
|
| 144 |
+
" return answer #returns tuple of modle and response from LLM\n",
|
| 145 |
+
"\n",
|
| 146 |
+
"llm_callable = partial(llm_call, messages=messages) #prefill with messages\n",
|
| 147 |
+
"# Always remember to do this!"
|
| 148 |
+
]
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"cell_type": "code",
|
| 152 |
+
"execution_count": null,
|
| 153 |
+
"metadata": {},
|
| 154 |
+
"outputs": [],
|
| 155 |
+
"source": [
|
| 156 |
+
"#gather all responses concurrently\n",
|
| 157 |
+
"tasks = [llm_callable(res.api_key,res.model,res.url) for res in llm_resources]\n",
|
| 158 |
+
"results = await asyncio.gather(*tasks)\n",
|
| 159 |
+
"together = [f'Response from competitor {model}:{answer}' for model,answer in results]#gather results once all model finish running\n"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": null,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"outputs": [],
|
| 167 |
+
"source": [
|
| 168 |
+
"judge = f\"\"\"You are judging a competition between {len(llm_resources)} competitors.\n",
|
| 169 |
+
"Each model has been given this question:\n",
|
| 170 |
+
"\n",
|
| 171 |
+
"{request}\n",
|
| 172 |
+
"\n",
|
| 173 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 174 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 175 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 176 |
+
"\n",
|
| 177 |
+
"Here are the responses from each competitor:\n",
|
| 178 |
+
"\n",
|
| 179 |
+
"{together} # all responses\n",
|
| 180 |
+
"\n",
|
| 181 |
+
"Now respond with the JSON with the ranked order of the competitors name, nothing else. Do not include markdown formatting or code blocks.\"\"\""
|
| 182 |
+
]
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"cell_type": "code",
|
| 186 |
+
"execution_count": null,
|
| 187 |
+
"metadata": {},
|
| 188 |
+
"outputs": [],
|
| 189 |
+
"source": [
|
| 190 |
+
"print(judge)"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": null,
|
| 196 |
+
"metadata": {},
|
| 197 |
+
"outputs": [],
|
| 198 |
+
"source": [
|
| 199 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 200 |
+
]
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"cell_type": "code",
|
| 204 |
+
"execution_count": null,
|
| 205 |
+
"metadata": {},
|
| 206 |
+
"outputs": [],
|
| 207 |
+
"source": [
|
| 208 |
+
"# Judgement time!\n",
|
| 209 |
+
"\n",
|
| 210 |
+
"openai = OpenAI()\n",
|
| 211 |
+
"response = openai.chat.completions.create(\n",
|
| 212 |
+
" model=\"o3-mini\",\n",
|
| 213 |
+
" messages=judge_messages,\n",
|
| 214 |
+
")\n",
|
| 215 |
+
"results = response.choices[0].message.content\n",
|
| 216 |
+
"print(results)\n"
|
| 217 |
+
]
|
| 218 |
+
},
|
| 219 |
+
{
|
| 220 |
+
"cell_type": "code",
|
| 221 |
+
"execution_count": null,
|
| 222 |
+
"metadata": {},
|
| 223 |
+
"outputs": [],
|
| 224 |
+
"source": [
|
| 225 |
+
"# OK let's turn this into results!\n",
|
| 226 |
+
"\n",
|
| 227 |
+
"results_dict = json.loads(results)\n",
|
| 228 |
+
"\n",
|
| 229 |
+
"ranks = results_dict[\"results\"]\n",
|
| 230 |
+
"\n",
|
| 231 |
+
"for index, result in enumerate(ranks):\n",
|
| 232 |
+
" print(f\"Rank {index+1}: {result}\")"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "markdown",
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"source": [
|
| 239 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 240 |
+
" <tr>\n",
|
| 241 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 242 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 243 |
+
" </td>\n",
|
| 244 |
+
" <td>\n",
|
| 245 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 246 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 247 |
+
" </span>\n",
|
| 248 |
+
" </td>\n",
|
| 249 |
+
" </tr>\n",
|
| 250 |
+
"</table>"
|
| 251 |
+
]
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"cell_type": "markdown",
|
| 255 |
+
"metadata": {},
|
| 256 |
+
"source": [
|
| 257 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 258 |
+
" <tr>\n",
|
| 259 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 260 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 261 |
+
" </td>\n",
|
| 262 |
+
" <td>\n",
|
| 263 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 264 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 265 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 266 |
+
" to business projects where accuracy is critical.\n",
|
| 267 |
+
" </span>\n",
|
| 268 |
+
" </td>\n",
|
| 269 |
+
" </tr>\n",
|
| 270 |
+
"</table>"
|
| 271 |
+
]
|
| 272 |
+
}
|
| 273 |
+
],
|
| 274 |
+
"metadata": {
|
| 275 |
+
"kernelspec": {
|
| 276 |
+
"display_name": ".venv",
|
| 277 |
+
"language": "python",
|
| 278 |
+
"name": "python3"
|
| 279 |
+
},
|
| 280 |
+
"language_info": {
|
| 281 |
+
"codemirror_mode": {
|
| 282 |
+
"name": "ipython",
|
| 283 |
+
"version": 3
|
| 284 |
+
},
|
| 285 |
+
"file_extension": ".py",
|
| 286 |
+
"mimetype": "text/x-python",
|
| 287 |
+
"name": "python",
|
| 288 |
+
"nbconvert_exporter": "python",
|
| 289 |
+
"pygments_lexer": "ipython3",
|
| 290 |
+
"version": "3.12.3"
|
| 291 |
+
}
|
| 292 |
+
},
|
| 293 |
+
"nbformat": 4,
|
| 294 |
+
"nbformat_minor": 2
|
| 295 |
+
}
|
community_contributions/2_lab2_async.ipynb
ADDED
|
@@ -0,0 +1,474 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "code",
|
| 14 |
+
"execution_count": 1,
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"import os\n",
|
| 21 |
+
"import json\n",
|
| 22 |
+
"import asyncio\n",
|
| 23 |
+
"from dotenv import load_dotenv\n",
|
| 24 |
+
"from openai import OpenAI, AsyncOpenAI\n",
|
| 25 |
+
"from anthropic import AsyncAnthropic\n",
|
| 26 |
+
"from pydantic import BaseModel"
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"execution_count": null,
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"outputs": [],
|
| 34 |
+
"source": [
|
| 35 |
+
"# Always remember to do this!\n",
|
| 36 |
+
"load_dotenv(override=True)"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "code",
|
| 41 |
+
"execution_count": null,
|
| 42 |
+
"metadata": {},
|
| 43 |
+
"outputs": [],
|
| 44 |
+
"source": [
|
| 45 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 46 |
+
"\n",
|
| 47 |
+
"OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n",
|
| 48 |
+
"ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 49 |
+
"GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')\n",
|
| 50 |
+
"DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 51 |
+
"GROQ_API_KEY = os.getenv('GROQ_API_KEY')\n",
|
| 52 |
+
"\n",
|
| 53 |
+
"if OPENAI_API_KEY:\n",
|
| 54 |
+
" print(f\"OpenAI API Key exists and begins {OPENAI_API_KEY[:8]}\")\n",
|
| 55 |
+
"else:\n",
|
| 56 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 57 |
+
" \n",
|
| 58 |
+
"if ANTHROPIC_API_KEY:\n",
|
| 59 |
+
" print(f\"Anthropic API Key exists and begins {ANTHROPIC_API_KEY[:7]}\")\n",
|
| 60 |
+
"else:\n",
|
| 61 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 62 |
+
"\n",
|
| 63 |
+
"if GOOGLE_API_KEY:\n",
|
| 64 |
+
" print(f\"Google API Key exists and begins {GOOGLE_API_KEY[:2]}\")\n",
|
| 65 |
+
"else:\n",
|
| 66 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"if DEEPSEEK_API_KEY:\n",
|
| 69 |
+
" print(f\"DeepSeek API Key exists and begins {DEEPSEEK_API_KEY[:3]}\")\n",
|
| 70 |
+
"else:\n",
|
| 71 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 72 |
+
"\n",
|
| 73 |
+
"if GROQ_API_KEY:\n",
|
| 74 |
+
" print(f\"Groq API Key exists and begins {GROQ_API_KEY[:4]}\")\n",
|
| 75 |
+
"else:\n",
|
| 76 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 4,
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"outputs": [],
|
| 84 |
+
"source": [
|
| 85 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 86 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 87 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 88 |
+
]
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"cell_type": "code",
|
| 92 |
+
"execution_count": null,
|
| 93 |
+
"metadata": {},
|
| 94 |
+
"outputs": [],
|
| 95 |
+
"source": [
|
| 96 |
+
"print(messages)"
|
| 97 |
+
]
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"cell_type": "code",
|
| 101 |
+
"execution_count": null,
|
| 102 |
+
"metadata": {},
|
| 103 |
+
"outputs": [],
|
| 104 |
+
"source": [
|
| 105 |
+
"openai = AsyncOpenAI()\n",
|
| 106 |
+
"response = await openai.chat.completions.create(\n",
|
| 107 |
+
" model=\"gpt-4o-mini\",\n",
|
| 108 |
+
" messages=messages,\n",
|
| 109 |
+
")\n",
|
| 110 |
+
"question = response.choices[0].message.content\n",
|
| 111 |
+
"print(question)\n"
|
| 112 |
+
]
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"cell_type": "code",
|
| 116 |
+
"execution_count": 7,
|
| 117 |
+
"metadata": {},
|
| 118 |
+
"outputs": [],
|
| 119 |
+
"source": [
|
| 120 |
+
"# Define Pydantic model for storing LLM results\n",
|
| 121 |
+
"class LLMResult(BaseModel):\n",
|
| 122 |
+
" model: str\n",
|
| 123 |
+
" answer: str\n"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"cell_type": "code",
|
| 128 |
+
"execution_count": 8,
|
| 129 |
+
"metadata": {},
|
| 130 |
+
"outputs": [],
|
| 131 |
+
"source": [
|
| 132 |
+
"results: list[LLMResult] = []\n",
|
| 133 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"cell_type": "code",
|
| 138 |
+
"execution_count": 9,
|
| 139 |
+
"metadata": {},
|
| 140 |
+
"outputs": [],
|
| 141 |
+
"source": [
|
| 142 |
+
"# The API we know well\n",
|
| 143 |
+
"async def openai_answer() -> None:\n",
|
| 144 |
+
"\n",
|
| 145 |
+
" if OPENAI_API_KEY is None:\n",
|
| 146 |
+
" return None\n",
|
| 147 |
+
" \n",
|
| 148 |
+
" print(\"OpenAI starting!\")\n",
|
| 149 |
+
" model_name = \"gpt-4o-mini\"\n",
|
| 150 |
+
"\n",
|
| 151 |
+
" try:\n",
|
| 152 |
+
" response = await openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
+
" answer = response.choices[0].message.content\n",
|
| 154 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 155 |
+
" except Exception as e:\n",
|
| 156 |
+
" print(f\"Error with OpenAI: {e}\")\n",
|
| 157 |
+
" return None\n",
|
| 158 |
+
"\n",
|
| 159 |
+
" print(\"OpenAI done!\")"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": 10,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"outputs": [],
|
| 167 |
+
"source": [
|
| 168 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"async def anthropic_answer() -> None:\n",
|
| 171 |
+
"\n",
|
| 172 |
+
" if ANTHROPIC_API_KEY is None:\n",
|
| 173 |
+
" return None\n",
|
| 174 |
+
" \n",
|
| 175 |
+
" print(\"Anthropic starting!\")\n",
|
| 176 |
+
" model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 177 |
+
"\n",
|
| 178 |
+
" claude = AsyncAnthropic()\n",
|
| 179 |
+
" try:\n",
|
| 180 |
+
" response = await claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 181 |
+
" answer = response.content[0].text\n",
|
| 182 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 183 |
+
" except Exception as e:\n",
|
| 184 |
+
" print(f\"Error with Anthropic: {e}\")\n",
|
| 185 |
+
" return None\n",
|
| 186 |
+
"\n",
|
| 187 |
+
" print(\"Anthropic done!\")"
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"cell_type": "code",
|
| 192 |
+
"execution_count": 11,
|
| 193 |
+
"metadata": {},
|
| 194 |
+
"outputs": [],
|
| 195 |
+
"source": [
|
| 196 |
+
"async def google_answer() -> None:\n",
|
| 197 |
+
"\n",
|
| 198 |
+
" if GOOGLE_API_KEY is None:\n",
|
| 199 |
+
" return None\n",
|
| 200 |
+
" \n",
|
| 201 |
+
" print(\"Google starting!\")\n",
|
| 202 |
+
" model_name = \"gemini-2.0-flash\"\n",
|
| 203 |
+
"\n",
|
| 204 |
+
" gemini = AsyncOpenAI(api_key=GOOGLE_API_KEY, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 205 |
+
" try:\n",
|
| 206 |
+
" response = await gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 207 |
+
" answer = response.choices[0].message.content\n",
|
| 208 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 209 |
+
" except Exception as e:\n",
|
| 210 |
+
" print(f\"Error with Google: {e}\")\n",
|
| 211 |
+
" return None\n",
|
| 212 |
+
"\n",
|
| 213 |
+
" print(\"Google done!\")"
|
| 214 |
+
]
|
| 215 |
+
},
|
| 216 |
+
{
|
| 217 |
+
"cell_type": "code",
|
| 218 |
+
"execution_count": 12,
|
| 219 |
+
"metadata": {},
|
| 220 |
+
"outputs": [],
|
| 221 |
+
"source": [
|
| 222 |
+
"async def deepseek_answer() -> None:\n",
|
| 223 |
+
"\n",
|
| 224 |
+
" if DEEPSEEK_API_KEY is None:\n",
|
| 225 |
+
" return None\n",
|
| 226 |
+
" \n",
|
| 227 |
+
" print(\"DeepSeek starting!\")\n",
|
| 228 |
+
" model_name = \"deepseek-chat\"\n",
|
| 229 |
+
"\n",
|
| 230 |
+
" deepseek = AsyncOpenAI(api_key=DEEPSEEK_API_KEY, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 231 |
+
" try:\n",
|
| 232 |
+
" response = await deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 233 |
+
" answer = response.choices[0].message.content\n",
|
| 234 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 235 |
+
" except Exception as e:\n",
|
| 236 |
+
" print(f\"Error with DeepSeek: {e}\")\n",
|
| 237 |
+
" return None\n",
|
| 238 |
+
"\n",
|
| 239 |
+
" print(\"DeepSeek done!\")"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "code",
|
| 244 |
+
"execution_count": 13,
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"outputs": [],
|
| 247 |
+
"source": [
|
| 248 |
+
"async def groq_answer() -> None:\n",
|
| 249 |
+
"\n",
|
| 250 |
+
" if GROQ_API_KEY is None:\n",
|
| 251 |
+
" return None\n",
|
| 252 |
+
" \n",
|
| 253 |
+
" print(\"Groq starting!\")\n",
|
| 254 |
+
" model_name = \"llama-3.3-70b-versatile\"\n",
|
| 255 |
+
"\n",
|
| 256 |
+
" groq = AsyncOpenAI(api_key=GROQ_API_KEY, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 257 |
+
" try:\n",
|
| 258 |
+
" response = await groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 259 |
+
" answer = response.choices[0].message.content\n",
|
| 260 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 261 |
+
" except Exception as e:\n",
|
| 262 |
+
" print(f\"Error with Groq: {e}\")\n",
|
| 263 |
+
" return None\n",
|
| 264 |
+
"\n",
|
| 265 |
+
" print(\"Groq done!\")\n"
|
| 266 |
+
]
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"cell_type": "markdown",
|
| 270 |
+
"metadata": {},
|
| 271 |
+
"source": [
|
| 272 |
+
"## For the next cell, we will use Ollama\n",
|
| 273 |
+
"\n",
|
| 274 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 275 |
+
"and runs models locally using high performance C++ code.\n",
|
| 276 |
+
"\n",
|
| 277 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 278 |
+
"\n",
|
| 279 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 280 |
+
"\n",
|
| 281 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 284 |
+
"\n",
|
| 285 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 286 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 287 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 288 |
+
]
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"cell_type": "markdown",
|
| 292 |
+
"metadata": {},
|
| 293 |
+
"source": [
|
| 294 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 295 |
+
" <tr>\n",
|
| 296 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 297 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 298 |
+
" </td>\n",
|
| 299 |
+
" <td>\n",
|
| 300 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 301 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 302 |
+
" </span>\n",
|
| 303 |
+
" </td>\n",
|
| 304 |
+
" </tr>\n",
|
| 305 |
+
"</table>"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"cell_type": "code",
|
| 310 |
+
"execution_count": null,
|
| 311 |
+
"metadata": {},
|
| 312 |
+
"outputs": [],
|
| 313 |
+
"source": [
|
| 314 |
+
"!ollama pull llama3.2"
|
| 315 |
+
]
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"cell_type": "code",
|
| 319 |
+
"execution_count": 15,
|
| 320 |
+
"metadata": {},
|
| 321 |
+
"outputs": [],
|
| 322 |
+
"source": [
|
| 323 |
+
"async def ollama_answer() -> None:\n",
|
| 324 |
+
" model_name = \"llama3.2\"\n",
|
| 325 |
+
"\n",
|
| 326 |
+
" print(\"Ollama starting!\")\n",
|
| 327 |
+
" ollama = AsyncOpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 328 |
+
" try:\n",
|
| 329 |
+
" response = await ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 330 |
+
" answer = response.choices[0].message.content\n",
|
| 331 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 332 |
+
" except Exception as e:\n",
|
| 333 |
+
" print(f\"Error with Ollama: {e}\")\n",
|
| 334 |
+
" return None\n",
|
| 335 |
+
"\n",
|
| 336 |
+
" print(\"Ollama done!\") "
|
| 337 |
+
]
|
| 338 |
+
},
|
| 339 |
+
{
|
| 340 |
+
"cell_type": "code",
|
| 341 |
+
"execution_count": null,
|
| 342 |
+
"metadata": {},
|
| 343 |
+
"outputs": [],
|
| 344 |
+
"source": [
|
| 345 |
+
"async def gather_answers():\n",
|
| 346 |
+
" tasks = [\n",
|
| 347 |
+
" openai_answer(),\n",
|
| 348 |
+
" anthropic_answer(),\n",
|
| 349 |
+
" google_answer(),\n",
|
| 350 |
+
" deepseek_answer(),\n",
|
| 351 |
+
" groq_answer(),\n",
|
| 352 |
+
" ollama_answer()\n",
|
| 353 |
+
" ]\n",
|
| 354 |
+
" await asyncio.gather(*tasks)\n",
|
| 355 |
+
"\n",
|
| 356 |
+
"await gather_answers()"
|
| 357 |
+
]
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"cell_type": "code",
|
| 361 |
+
"execution_count": null,
|
| 362 |
+
"metadata": {},
|
| 363 |
+
"outputs": [],
|
| 364 |
+
"source": [
|
| 365 |
+
"together = \"\"\n",
|
| 366 |
+
"competitors = []\n",
|
| 367 |
+
"answers = []\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"for res in results:\n",
|
| 370 |
+
" competitor = res.model\n",
|
| 371 |
+
" answer = res.answer\n",
|
| 372 |
+
" competitors.append(competitor)\n",
|
| 373 |
+
" answers.append(answer)\n",
|
| 374 |
+
" together += f\"# Response from competitor {competitor}\\n\\n\"\n",
|
| 375 |
+
" together += answer + \"\\n\\n\"\n",
|
| 376 |
+
"\n",
|
| 377 |
+
"print(f\"Number of competitors: {len(results)}\")\n",
|
| 378 |
+
"print(together)\n"
|
| 379 |
+
]
|
| 380 |
+
},
|
| 381 |
+
{
|
| 382 |
+
"cell_type": "code",
|
| 383 |
+
"execution_count": 18,
|
| 384 |
+
"metadata": {},
|
| 385 |
+
"outputs": [],
|
| 386 |
+
"source": [
|
| 387 |
+
"judge = f\"\"\"You are judging a competition between {len(results)} competitors.\n",
|
| 388 |
+
"Each model has been given this question:\n",
|
| 389 |
+
"\n",
|
| 390 |
+
"{question}\n",
|
| 391 |
+
"\n",
|
| 392 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 393 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 394 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 395 |
+
"\n",
|
| 396 |
+
"Here are the responses from each competitor:\n",
|
| 397 |
+
"\n",
|
| 398 |
+
"{together}\n",
|
| 399 |
+
"\n",
|
| 400 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 401 |
+
]
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"cell_type": "code",
|
| 405 |
+
"execution_count": null,
|
| 406 |
+
"metadata": {},
|
| 407 |
+
"outputs": [],
|
| 408 |
+
"source": [
|
| 409 |
+
"print(judge)"
|
| 410 |
+
]
|
| 411 |
+
},
|
| 412 |
+
{
|
| 413 |
+
"cell_type": "code",
|
| 414 |
+
"execution_count": 20,
|
| 415 |
+
"metadata": {},
|
| 416 |
+
"outputs": [],
|
| 417 |
+
"source": [
|
| 418 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 419 |
+
]
|
| 420 |
+
},
|
| 421 |
+
{
|
| 422 |
+
"cell_type": "code",
|
| 423 |
+
"execution_count": null,
|
| 424 |
+
"metadata": {},
|
| 425 |
+
"outputs": [],
|
| 426 |
+
"source": [
|
| 427 |
+
"# Judgement time!\n",
|
| 428 |
+
"\n",
|
| 429 |
+
"openai = OpenAI()\n",
|
| 430 |
+
"response = openai.chat.completions.create(\n",
|
| 431 |
+
" model=\"o3-mini\",\n",
|
| 432 |
+
" messages=judge_messages,\n",
|
| 433 |
+
")\n",
|
| 434 |
+
"judgement = response.choices[0].message.content\n",
|
| 435 |
+
"print(judgement)\n"
|
| 436 |
+
]
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"cell_type": "code",
|
| 440 |
+
"execution_count": null,
|
| 441 |
+
"metadata": {},
|
| 442 |
+
"outputs": [],
|
| 443 |
+
"source": [
|
| 444 |
+
"# OK let's turn this into results!\n",
|
| 445 |
+
"\n",
|
| 446 |
+
"results_dict = json.loads(judgement)\n",
|
| 447 |
+
"ranks = results_dict[\"results\"]\n",
|
| 448 |
+
"for index, comp in enumerate(ranks):\n",
|
| 449 |
+
" print(f\"Rank {index+1}: {comp}\")"
|
| 450 |
+
]
|
| 451 |
+
}
|
| 452 |
+
],
|
| 453 |
+
"metadata": {
|
| 454 |
+
"kernelspec": {
|
| 455 |
+
"display_name": ".venv",
|
| 456 |
+
"language": "python",
|
| 457 |
+
"name": "python3"
|
| 458 |
+
},
|
| 459 |
+
"language_info": {
|
| 460 |
+
"codemirror_mode": {
|
| 461 |
+
"name": "ipython",
|
| 462 |
+
"version": 3
|
| 463 |
+
},
|
| 464 |
+
"file_extension": ".py",
|
| 465 |
+
"mimetype": "text/x-python",
|
| 466 |
+
"name": "python",
|
| 467 |
+
"nbconvert_exporter": "python",
|
| 468 |
+
"pygments_lexer": "ipython3",
|
| 469 |
+
"version": "3.12.11"
|
| 470 |
+
}
|
| 471 |
+
},
|
| 472 |
+
"nbformat": 4,
|
| 473 |
+
"nbformat_minor": 2
|
| 474 |
+
}
|
community_contributions/2_lab2_async_with_reasons.ipynb
ADDED
|
@@ -0,0 +1,490 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"This was derived from 2_lab2_async. "
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "code",
|
| 21 |
+
"execution_count": null,
|
| 22 |
+
"metadata": {},
|
| 23 |
+
"outputs": [],
|
| 24 |
+
"source": [
|
| 25 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 26 |
+
"\n",
|
| 27 |
+
"import os\n",
|
| 28 |
+
"import json\n",
|
| 29 |
+
"import asyncio\n",
|
| 30 |
+
"from dotenv import load_dotenv\n",
|
| 31 |
+
"from openai import OpenAI, AsyncOpenAI\n",
|
| 32 |
+
"from anthropic import AsyncAnthropic\n",
|
| 33 |
+
"from pydantic import BaseModel"
|
| 34 |
+
]
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"cell_type": "code",
|
| 38 |
+
"execution_count": null,
|
| 39 |
+
"metadata": {},
|
| 40 |
+
"outputs": [],
|
| 41 |
+
"source": [
|
| 42 |
+
"# Always remember to do this!\n",
|
| 43 |
+
"load_dotenv(override=True)"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 53 |
+
"\n",
|
| 54 |
+
"OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n",
|
| 55 |
+
"ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 56 |
+
"GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')\n",
|
| 57 |
+
"DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 58 |
+
"GROQ_API_KEY = os.getenv('GROQ_API_KEY')\n",
|
| 59 |
+
"\n",
|
| 60 |
+
"if OPENAI_API_KEY:\n",
|
| 61 |
+
" print(f\"OpenAI API Key exists and begins {OPENAI_API_KEY[:8]}\")\n",
|
| 62 |
+
"else:\n",
|
| 63 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 64 |
+
" \n",
|
| 65 |
+
"if ANTHROPIC_API_KEY:\n",
|
| 66 |
+
" print(f\"Anthropic API Key exists and begins {ANTHROPIC_API_KEY[:7]}\")\n",
|
| 67 |
+
"else:\n",
|
| 68 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"if GOOGLE_API_KEY:\n",
|
| 71 |
+
" print(f\"Google API Key exists and begins {GOOGLE_API_KEY[:2]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 74 |
+
"\n",
|
| 75 |
+
"if DEEPSEEK_API_KEY:\n",
|
| 76 |
+
" print(f\"DeepSeek API Key exists and begins {DEEPSEEK_API_KEY[:3]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"if GROQ_API_KEY:\n",
|
| 81 |
+
" print(f\"Groq API Key exists and begins {GROQ_API_KEY[:4]}\")\n",
|
| 82 |
+
"else:\n",
|
| 83 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "code",
|
| 88 |
+
"execution_count": null,
|
| 89 |
+
"metadata": {},
|
| 90 |
+
"outputs": [],
|
| 91 |
+
"source": [
|
| 92 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 93 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 94 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": null,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"print(messages)"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "code",
|
| 108 |
+
"execution_count": null,
|
| 109 |
+
"metadata": {},
|
| 110 |
+
"outputs": [],
|
| 111 |
+
"source": [
|
| 112 |
+
"openai = AsyncOpenAI()\n",
|
| 113 |
+
"response = await openai.chat.completions.create(\n",
|
| 114 |
+
" model=\"gpt-4o-mini\",\n",
|
| 115 |
+
" messages=messages,\n",
|
| 116 |
+
")\n",
|
| 117 |
+
"question = response.choices[0].message.content\n",
|
| 118 |
+
"print(question)\n"
|
| 119 |
+
]
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"cell_type": "code",
|
| 123 |
+
"execution_count": null,
|
| 124 |
+
"metadata": {},
|
| 125 |
+
"outputs": [],
|
| 126 |
+
"source": [
|
| 127 |
+
"# Define Pydantic model for storing LLM results\n",
|
| 128 |
+
"class LLMResult(BaseModel):\n",
|
| 129 |
+
" model: str\n",
|
| 130 |
+
" answer: str\n"
|
| 131 |
+
]
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"cell_type": "code",
|
| 135 |
+
"execution_count": null,
|
| 136 |
+
"metadata": {},
|
| 137 |
+
"outputs": [],
|
| 138 |
+
"source": [
|
| 139 |
+
"results: list[LLMResult] = []\n",
|
| 140 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 141 |
+
]
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"cell_type": "code",
|
| 145 |
+
"execution_count": null,
|
| 146 |
+
"metadata": {},
|
| 147 |
+
"outputs": [],
|
| 148 |
+
"source": [
|
| 149 |
+
"# The API we know well\n",
|
| 150 |
+
"async def openai_answer() -> None:\n",
|
| 151 |
+
"\n",
|
| 152 |
+
" if OPENAI_API_KEY is None:\n",
|
| 153 |
+
" return None\n",
|
| 154 |
+
" \n",
|
| 155 |
+
" print(\"OpenAI starting!\")\n",
|
| 156 |
+
" model_name = \"gpt-4o-mini\"\n",
|
| 157 |
+
"\n",
|
| 158 |
+
" try:\n",
|
| 159 |
+
" response = await openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 160 |
+
" answer = response.choices[0].message.content\n",
|
| 161 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 162 |
+
" except Exception as e:\n",
|
| 163 |
+
" print(f\"Error with OpenAI: {e}\")\n",
|
| 164 |
+
" return None\n",
|
| 165 |
+
"\n",
|
| 166 |
+
" print(\"OpenAI done!\")"
|
| 167 |
+
]
|
| 168 |
+
},
|
| 169 |
+
{
|
| 170 |
+
"cell_type": "code",
|
| 171 |
+
"execution_count": null,
|
| 172 |
+
"metadata": {},
|
| 173 |
+
"outputs": [],
|
| 174 |
+
"source": [
|
| 175 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 176 |
+
"\n",
|
| 177 |
+
"async def anthropic_answer() -> None:\n",
|
| 178 |
+
"\n",
|
| 179 |
+
" if ANTHROPIC_API_KEY is None:\n",
|
| 180 |
+
" return None\n",
|
| 181 |
+
" \n",
|
| 182 |
+
" print(\"Anthropic starting!\")\n",
|
| 183 |
+
" model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 184 |
+
"\n",
|
| 185 |
+
" claude = AsyncAnthropic()\n",
|
| 186 |
+
" try:\n",
|
| 187 |
+
" response = await claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 188 |
+
" answer = response.content[0].text\n",
|
| 189 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 190 |
+
" except Exception as e:\n",
|
| 191 |
+
" print(f\"Error with Anthropic: {e}\")\n",
|
| 192 |
+
" return None\n",
|
| 193 |
+
"\n",
|
| 194 |
+
" print(\"Anthropic done!\")"
|
| 195 |
+
]
|
| 196 |
+
},
|
| 197 |
+
{
|
| 198 |
+
"cell_type": "code",
|
| 199 |
+
"execution_count": null,
|
| 200 |
+
"metadata": {},
|
| 201 |
+
"outputs": [],
|
| 202 |
+
"source": [
|
| 203 |
+
"async def google_answer() -> None:\n",
|
| 204 |
+
"\n",
|
| 205 |
+
" if GOOGLE_API_KEY is None:\n",
|
| 206 |
+
" return None\n",
|
| 207 |
+
" \n",
|
| 208 |
+
" print(\"Google starting!\")\n",
|
| 209 |
+
" model_name = \"gemini-2.0-flash\"\n",
|
| 210 |
+
"\n",
|
| 211 |
+
" gemini = AsyncOpenAI(api_key=GOOGLE_API_KEY, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 212 |
+
" try:\n",
|
| 213 |
+
" response = await gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 214 |
+
" answer = response.choices[0].message.content\n",
|
| 215 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 216 |
+
" except Exception as e:\n",
|
| 217 |
+
" print(f\"Error with Google: {e}\")\n",
|
| 218 |
+
" return None\n",
|
| 219 |
+
"\n",
|
| 220 |
+
" print(\"Google done!\")"
|
| 221 |
+
]
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"cell_type": "code",
|
| 225 |
+
"execution_count": null,
|
| 226 |
+
"metadata": {},
|
| 227 |
+
"outputs": [],
|
| 228 |
+
"source": [
|
| 229 |
+
"async def deepseek_answer() -> None:\n",
|
| 230 |
+
"\n",
|
| 231 |
+
" if DEEPSEEK_API_KEY is None:\n",
|
| 232 |
+
" return None\n",
|
| 233 |
+
" \n",
|
| 234 |
+
" print(\"DeepSeek starting!\")\n",
|
| 235 |
+
" model_name = \"deepseek-chat\"\n",
|
| 236 |
+
"\n",
|
| 237 |
+
" deepseek = AsyncOpenAI(api_key=DEEPSEEK_API_KEY, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 238 |
+
" try:\n",
|
| 239 |
+
" response = await deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 240 |
+
" answer = response.choices[0].message.content\n",
|
| 241 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 242 |
+
" except Exception as e:\n",
|
| 243 |
+
" print(f\"Error with DeepSeek: {e}\")\n",
|
| 244 |
+
" return None\n",
|
| 245 |
+
"\n",
|
| 246 |
+
" print(\"DeepSeek done!\")"
|
| 247 |
+
]
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"cell_type": "code",
|
| 251 |
+
"execution_count": null,
|
| 252 |
+
"metadata": {},
|
| 253 |
+
"outputs": [],
|
| 254 |
+
"source": [
|
| 255 |
+
"async def groq_answer() -> None:\n",
|
| 256 |
+
"\n",
|
| 257 |
+
" if GROQ_API_KEY is None:\n",
|
| 258 |
+
" return None\n",
|
| 259 |
+
" \n",
|
| 260 |
+
" print(\"Groq starting!\")\n",
|
| 261 |
+
" model_name = \"llama-3.3-70b-versatile\"\n",
|
| 262 |
+
"\n",
|
| 263 |
+
" groq = AsyncOpenAI(api_key=GROQ_API_KEY, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 264 |
+
" try:\n",
|
| 265 |
+
" response = await groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 266 |
+
" answer = response.choices[0].message.content\n",
|
| 267 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 268 |
+
" except Exception as e:\n",
|
| 269 |
+
" print(f\"Error with Groq: {e}\")\n",
|
| 270 |
+
" return None\n",
|
| 271 |
+
"\n",
|
| 272 |
+
" print(\"Groq done!\")\n"
|
| 273 |
+
]
|
| 274 |
+
},
|
| 275 |
+
{
|
| 276 |
+
"cell_type": "markdown",
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"source": [
|
| 279 |
+
"## For the next cell, we will use Ollama\n",
|
| 280 |
+
"\n",
|
| 281 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 282 |
+
"and runs models locally using high performance C++ code.\n",
|
| 283 |
+
"\n",
|
| 284 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 285 |
+
"\n",
|
| 286 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 289 |
+
"\n",
|
| 290 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 291 |
+
"\n",
|
| 292 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 293 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 294 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 295 |
+
]
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"cell_type": "markdown",
|
| 299 |
+
"metadata": {},
|
| 300 |
+
"source": [
|
| 301 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 302 |
+
" <tr>\n",
|
| 303 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 304 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 305 |
+
" </td>\n",
|
| 306 |
+
" <td>\n",
|
| 307 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 308 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 309 |
+
" </span>\n",
|
| 310 |
+
" </td>\n",
|
| 311 |
+
" </tr>\n",
|
| 312 |
+
"</table>"
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
{
|
| 316 |
+
"cell_type": "code",
|
| 317 |
+
"execution_count": null,
|
| 318 |
+
"metadata": {},
|
| 319 |
+
"outputs": [],
|
| 320 |
+
"source": [
|
| 321 |
+
"!ollama pull llama3.2"
|
| 322 |
+
]
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"cell_type": "code",
|
| 326 |
+
"execution_count": null,
|
| 327 |
+
"metadata": {},
|
| 328 |
+
"outputs": [],
|
| 329 |
+
"source": [
|
| 330 |
+
"async def ollama_answer() -> None:\n",
|
| 331 |
+
" model_name = \"llama3.2\"\n",
|
| 332 |
+
"\n",
|
| 333 |
+
" print(\"Ollama starting!\")\n",
|
| 334 |
+
" ollama = AsyncOpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 335 |
+
" try:\n",
|
| 336 |
+
" response = await ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 337 |
+
" answer = response.choices[0].message.content\n",
|
| 338 |
+
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 339 |
+
" except Exception as e:\n",
|
| 340 |
+
" print(f\"Error with Ollama: {e}\")\n",
|
| 341 |
+
" return None\n",
|
| 342 |
+
"\n",
|
| 343 |
+
" print(\"Ollama done!\") "
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"cell_type": "code",
|
| 348 |
+
"execution_count": null,
|
| 349 |
+
"metadata": {},
|
| 350 |
+
"outputs": [],
|
| 351 |
+
"source": [
|
| 352 |
+
"async def gather_answers():\n",
|
| 353 |
+
" tasks = [\n",
|
| 354 |
+
" openai_answer(),\n",
|
| 355 |
+
" anthropic_answer(),\n",
|
| 356 |
+
" google_answer(),\n",
|
| 357 |
+
" deepseek_answer(),\n",
|
| 358 |
+
" groq_answer(),\n",
|
| 359 |
+
" ollama_answer()\n",
|
| 360 |
+
" ]\n",
|
| 361 |
+
" await asyncio.gather(*tasks)\n",
|
| 362 |
+
"\n",
|
| 363 |
+
"await gather_answers()"
|
| 364 |
+
]
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"cell_type": "code",
|
| 368 |
+
"execution_count": null,
|
| 369 |
+
"metadata": {},
|
| 370 |
+
"outputs": [],
|
| 371 |
+
"source": [
|
| 372 |
+
"together = \"\"\n",
|
| 373 |
+
"competitors = []\n",
|
| 374 |
+
"answers = []\n",
|
| 375 |
+
"\n",
|
| 376 |
+
"for res in results:\n",
|
| 377 |
+
" competitor = res.model\n",
|
| 378 |
+
" answer = res.answer\n",
|
| 379 |
+
" competitors.append(competitor)\n",
|
| 380 |
+
" answers.append(answer)\n",
|
| 381 |
+
" together += f\"# Response from competitor {competitor}\\n\\n\"\n",
|
| 382 |
+
" together += answer + \"\\n\\n\"\n",
|
| 383 |
+
"\n",
|
| 384 |
+
"print(f\"Number of competitors: {len(results)}\")\n",
|
| 385 |
+
"print(together)\n"
|
| 386 |
+
]
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"cell_type": "code",
|
| 390 |
+
"execution_count": null,
|
| 391 |
+
"metadata": {},
|
| 392 |
+
"outputs": [],
|
| 393 |
+
"source": [
|
| 394 |
+
"judge = f\"\"\"You are judging a competition between {len(results)} competitors.\n",
|
| 395 |
+
"Each model has been given this question:\n",
|
| 396 |
+
"\n",
|
| 397 |
+
"{question}\n",
|
| 398 |
+
"\n",
|
| 399 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 400 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 401 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...],\n",
|
| 402 |
+
"\"explanations\": [\"explanation for each rank\", \"explanation for each rank\", \"explanation for each rank\", ...]}}\n",
|
| 403 |
+
"\n",
|
| 404 |
+
"Here are the responses from each competitor:\n",
|
| 405 |
+
"\n",
|
| 406 |
+
"{together}\n",
|
| 407 |
+
"\n",
|
| 408 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 409 |
+
]
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"cell_type": "code",
|
| 413 |
+
"execution_count": null,
|
| 414 |
+
"metadata": {},
|
| 415 |
+
"outputs": [],
|
| 416 |
+
"source": [
|
| 417 |
+
"print(judge)"
|
| 418 |
+
]
|
| 419 |
+
},
|
| 420 |
+
{
|
| 421 |
+
"cell_type": "code",
|
| 422 |
+
"execution_count": null,
|
| 423 |
+
"metadata": {},
|
| 424 |
+
"outputs": [],
|
| 425 |
+
"source": [
|
| 426 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 427 |
+
]
|
| 428 |
+
},
|
| 429 |
+
{
|
| 430 |
+
"cell_type": "code",
|
| 431 |
+
"execution_count": null,
|
| 432 |
+
"metadata": {},
|
| 433 |
+
"outputs": [],
|
| 434 |
+
"source": [
|
| 435 |
+
"# Judgement time!\n",
|
| 436 |
+
"\n",
|
| 437 |
+
"openai = OpenAI()\n",
|
| 438 |
+
"response = openai.chat.completions.create(\n",
|
| 439 |
+
" model=\"o3-mini\",\n",
|
| 440 |
+
" messages=judge_messages,\n",
|
| 441 |
+
")\n",
|
| 442 |
+
"judgement = response.choices[0].message.content\n",
|
| 443 |
+
"print(judgement)\n"
|
| 444 |
+
]
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"cell_type": "code",
|
| 448 |
+
"execution_count": null,
|
| 449 |
+
"metadata": {},
|
| 450 |
+
"outputs": [],
|
| 451 |
+
"source": [
|
| 452 |
+
"# OK let's turn this into results!\n",
|
| 453 |
+
"\n",
|
| 454 |
+
"results_dict = json.loads(judgement)\n",
|
| 455 |
+
"ranks = results_dict[\"results\"]\n",
|
| 456 |
+
"explanations = results_dict[\"explanations\"]\n",
|
| 457 |
+
"for index, comp in enumerate(ranks):\n",
|
| 458 |
+
" print(f\"Rank {index+1}: {comp} \\n\\t{explanations[index]}\")"
|
| 459 |
+
]
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"cell_type": "code",
|
| 463 |
+
"execution_count": null,
|
| 464 |
+
"metadata": {},
|
| 465 |
+
"outputs": [],
|
| 466 |
+
"source": []
|
| 467 |
+
}
|
| 468 |
+
],
|
| 469 |
+
"metadata": {
|
| 470 |
+
"kernelspec": {
|
| 471 |
+
"display_name": ".venv",
|
| 472 |
+
"language": "python",
|
| 473 |
+
"name": "python3"
|
| 474 |
+
},
|
| 475 |
+
"language_info": {
|
| 476 |
+
"codemirror_mode": {
|
| 477 |
+
"name": "ipython",
|
| 478 |
+
"version": 3
|
| 479 |
+
},
|
| 480 |
+
"file_extension": ".py",
|
| 481 |
+
"mimetype": "text/x-python",
|
| 482 |
+
"name": "python",
|
| 483 |
+
"nbconvert_exporter": "python",
|
| 484 |
+
"pygments_lexer": "ipython3",
|
| 485 |
+
"version": "3.12.2"
|
| 486 |
+
}
|
| 487 |
+
},
|
| 488 |
+
"nbformat": 4,
|
| 489 |
+
"nbformat_minor": 2
|
| 490 |
+
}
|
community_contributions/2_lab2_doclee99_gpt5_improves_gemini.25flash.ipynb
ADDED
|
@@ -0,0 +1,620 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": null,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"import os\n",
|
| 39 |
+
"import json\n",
|
| 40 |
+
"from dotenv import load_dotenv\n",
|
| 41 |
+
"from openai import OpenAI\n",
|
| 42 |
+
"from anthropic import Anthropic\n",
|
| 43 |
+
"from IPython.display import Markdown, display"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"# Always remember to do this!\n",
|
| 53 |
+
"load_dotenv(override=True)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"if openai_api_key:\n",
|
| 71 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
+
" \n",
|
| 75 |
+
"if anthropic_api_key:\n",
|
| 76 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"if google_api_key:\n",
|
| 81 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
+
"else:\n",
|
| 83 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"if deepseek_api_key:\n",
|
| 86 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
+
"else:\n",
|
| 88 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"if groq_api_key:\n",
|
| 91 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
+
"else:\n",
|
| 93 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": null,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "code",
|
| 109 |
+
"execution_count": null,
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"source": [
|
| 113 |
+
"messages"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": null,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [],
|
| 121 |
+
"source": [
|
| 122 |
+
"openai = OpenAI()\n",
|
| 123 |
+
"response = openai.chat.completions.create(\n",
|
| 124 |
+
" model=\"gpt-4o-mini\",\n",
|
| 125 |
+
" messages=messages,\n",
|
| 126 |
+
")\n",
|
| 127 |
+
"question = response.choices[0].message.content\n",
|
| 128 |
+
"print(question)\n"
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"cell_type": "code",
|
| 133 |
+
"execution_count": null,
|
| 134 |
+
"metadata": {},
|
| 135 |
+
"outputs": [],
|
| 136 |
+
"source": [
|
| 137 |
+
"competitors = []\n",
|
| 138 |
+
"answers = []\n",
|
| 139 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": null,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [],
|
| 147 |
+
"source": [
|
| 148 |
+
"# The API we know well\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"model_name = \"gpt-4o-mini\"\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
+
"answer = response.choices[0].message.content\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"display(Markdown(answer))\n",
|
| 156 |
+
"competitors.append(model_name)\n",
|
| 157 |
+
"answers.append(answer)"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": null,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"claude = Anthropic()\n",
|
| 171 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 172 |
+
"answer = response.content[0].text\n",
|
| 173 |
+
"\n",
|
| 174 |
+
"display(Markdown(answer))\n",
|
| 175 |
+
"competitors.append(model_name)\n",
|
| 176 |
+
"answers.append(answer)"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"cell_type": "code",
|
| 181 |
+
"execution_count": null,
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"outputs": [],
|
| 184 |
+
"source": [
|
| 185 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 186 |
+
"model_name = \"gemini-2.0-flash\"\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
+
"answer = response.choices[0].message.content\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"display(Markdown(answer))\n",
|
| 192 |
+
"competitors.append(model_name)\n",
|
| 193 |
+
"answers.append(answer)"
|
| 194 |
+
]
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"cell_type": "code",
|
| 198 |
+
"execution_count": null,
|
| 199 |
+
"metadata": {},
|
| 200 |
+
"outputs": [],
|
| 201 |
+
"source": [
|
| 202 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 203 |
+
"model_name = \"deepseek-chat\"\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
+
"answer = response.choices[0].message.content\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"display(Markdown(answer))\n",
|
| 209 |
+
"competitors.append(model_name)\n",
|
| 210 |
+
"answers.append(answer)"
|
| 211 |
+
]
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"cell_type": "code",
|
| 215 |
+
"execution_count": null,
|
| 216 |
+
"metadata": {},
|
| 217 |
+
"outputs": [],
|
| 218 |
+
"source": [
|
| 219 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 220 |
+
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 223 |
+
"answer = response.choices[0].message.content\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"display(Markdown(answer))\n",
|
| 226 |
+
"competitors.append(model_name)\n",
|
| 227 |
+
"answers.append(answer)\n"
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"cell_type": "markdown",
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"source": [
|
| 234 |
+
"## For the next cell, we will use Ollama\n",
|
| 235 |
+
"\n",
|
| 236 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 237 |
+
"and runs models locally using high performance C++ code.\n",
|
| 238 |
+
"\n",
|
| 239 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 240 |
+
"\n",
|
| 241 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 246 |
+
"\n",
|
| 247 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 248 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 249 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 250 |
+
]
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"cell_type": "markdown",
|
| 254 |
+
"metadata": {},
|
| 255 |
+
"source": [
|
| 256 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 257 |
+
" <tr>\n",
|
| 258 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 259 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 260 |
+
" </td>\n",
|
| 261 |
+
" <td>\n",
|
| 262 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 263 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 264 |
+
" </span>\n",
|
| 265 |
+
" </td>\n",
|
| 266 |
+
" </tr>\n",
|
| 267 |
+
"</table>"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": null,
|
| 273 |
+
"metadata": {},
|
| 274 |
+
"outputs": [],
|
| 275 |
+
"source": [
|
| 276 |
+
"!ollama pull llama3.2"
|
| 277 |
+
]
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
"cell_type": "code",
|
| 281 |
+
"execution_count": null,
|
| 282 |
+
"metadata": {},
|
| 283 |
+
"outputs": [],
|
| 284 |
+
"source": [
|
| 285 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 286 |
+
"model_name = \"llama3.2\"\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 289 |
+
"answer = response.choices[0].message.content\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"display(Markdown(answer))\n",
|
| 292 |
+
"competitors.append(model_name)\n",
|
| 293 |
+
"answers.append(answer)"
|
| 294 |
+
]
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"cell_type": "code",
|
| 298 |
+
"execution_count": null,
|
| 299 |
+
"metadata": {},
|
| 300 |
+
"outputs": [],
|
| 301 |
+
"source": [
|
| 302 |
+
"# So where are we?\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"print(competitors)\n",
|
| 305 |
+
"print(answers)\n"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"cell_type": "code",
|
| 310 |
+
"execution_count": null,
|
| 311 |
+
"metadata": {},
|
| 312 |
+
"outputs": [],
|
| 313 |
+
"source": [
|
| 314 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 315 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 316 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "code",
|
| 321 |
+
"execution_count": null,
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"outputs": [],
|
| 324 |
+
"source": [
|
| 325 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 326 |
+
"\n",
|
| 327 |
+
"together = \"\"\n",
|
| 328 |
+
"for index, answer in enumerate(answers):\n",
|
| 329 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 330 |
+
" together += answer + \"\\n\\n\""
|
| 331 |
+
]
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"cell_type": "code",
|
| 335 |
+
"execution_count": null,
|
| 336 |
+
"metadata": {},
|
| 337 |
+
"outputs": [],
|
| 338 |
+
"source": [
|
| 339 |
+
"# print(together)\n",
|
| 340 |
+
"display(Markdown(together))"
|
| 341 |
+
]
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"cell_type": "code",
|
| 345 |
+
"execution_count": null,
|
| 346 |
+
"metadata": {},
|
| 347 |
+
"outputs": [],
|
| 348 |
+
"source": [
|
| 349 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 350 |
+
"Each model has been given this question:\n",
|
| 351 |
+
"\n",
|
| 352 |
+
"{question}\n",
|
| 353 |
+
"\n",
|
| 354 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 355 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 356 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 357 |
+
"\n",
|
| 358 |
+
"Here are the responses from each competitor:\n",
|
| 359 |
+
"\n",
|
| 360 |
+
"{together}\n",
|
| 361 |
+
"\n",
|
| 362 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 363 |
+
]
|
| 364 |
+
},
|
| 365 |
+
{
|
| 366 |
+
"cell_type": "code",
|
| 367 |
+
"execution_count": null,
|
| 368 |
+
"metadata": {},
|
| 369 |
+
"outputs": [],
|
| 370 |
+
"source": [
|
| 371 |
+
"print(judge)"
|
| 372 |
+
]
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"cell_type": "code",
|
| 376 |
+
"execution_count": null,
|
| 377 |
+
"metadata": {},
|
| 378 |
+
"outputs": [],
|
| 379 |
+
"source": [
|
| 380 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 381 |
+
]
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"cell_type": "code",
|
| 385 |
+
"execution_count": null,
|
| 386 |
+
"metadata": {},
|
| 387 |
+
"outputs": [],
|
| 388 |
+
"source": [
|
| 389 |
+
"# Judgement time!\n",
|
| 390 |
+
"\n",
|
| 391 |
+
"openai = OpenAI()\n",
|
| 392 |
+
"response = openai.chat.completions.create(\n",
|
| 393 |
+
" model=\"o3-mini\",\n",
|
| 394 |
+
" messages=judge_messages,\n",
|
| 395 |
+
")\n",
|
| 396 |
+
"results = response.choices[0].message.content\n",
|
| 397 |
+
"print(results)\n"
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"cell_type": "code",
|
| 402 |
+
"execution_count": null,
|
| 403 |
+
"metadata": {},
|
| 404 |
+
"outputs": [],
|
| 405 |
+
"source": [
|
| 406 |
+
"# OK let's turn this into results!\n",
|
| 407 |
+
"\n",
|
| 408 |
+
"results_dict = json.loads(results)\n",
|
| 409 |
+
"ranks = results_dict[\"results\"]\n",
|
| 410 |
+
"for index, result in enumerate(ranks):\n",
|
| 411 |
+
" competitor = competitors[int(result)-1]\n",
|
| 412 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 413 |
+
]
|
| 414 |
+
},
|
| 415 |
+
{
|
| 416 |
+
"cell_type": "markdown",
|
| 417 |
+
"metadata": {},
|
| 418 |
+
"source": [
|
| 419 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 420 |
+
" <tr>\n",
|
| 421 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 422 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 423 |
+
" </td>\n",
|
| 424 |
+
" <td>\n",
|
| 425 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 426 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 427 |
+
" </span>\n",
|
| 428 |
+
" </td>\n",
|
| 429 |
+
" </tr>\n",
|
| 430 |
+
"</table>"
|
| 431 |
+
]
|
| 432 |
+
},
|
| 433 |
+
{
|
| 434 |
+
"cell_type": "code",
|
| 435 |
+
"execution_count": null,
|
| 436 |
+
"metadata": {},
|
| 437 |
+
"outputs": [],
|
| 438 |
+
"source": [
|
| 439 |
+
"# Implement Evaluator-Optimizer workflow design pattern - An Optimizer LLM analyzes the response of the top-ranked competitor\n",
|
| 440 |
+
"# and creates a system prompt designed to improve the response. The system prompot is then\n",
|
| 441 |
+
"# sent back to the top-ranked competitor to deliver a new response. \n",
|
| 442 |
+
"# The optimizer LLM then compares the new response to the old response and surmises\n",
|
| 443 |
+
"# what aspects of the system prompt may be responsible for the differences in the responses.\n",
|
| 444 |
+
"\n",
|
| 445 |
+
"\n",
|
| 446 |
+
"\n",
|
| 447 |
+
"# Get the top competitor (model name) and their response\n",
|
| 448 |
+
"top_rank_index = int(ranks[0]) - 1\n",
|
| 449 |
+
"top_competitor_name = competitors[top_rank_index]\n",
|
| 450 |
+
"top_competitor_response = answers[top_rank_index]\n",
|
| 451 |
+
"top_competitor_prompt = question\n",
|
| 452 |
+
"\n",
|
| 453 |
+
"# Compose a system prompt for GPT-5 to act as an expert evaluator of question quality and answer depth\n",
|
| 454 |
+
"system_prompt = (\n",
|
| 455 |
+
" \"You are an expert evaluator of LLM prompt quality and answer depth. \"\n",
|
| 456 |
+
" \"Your task is to analyze the comprehensiveness and depth of thought in the following answer, \"\n",
|
| 457 |
+
" \"which was generated by a language model in response to a challenging question. \"\n",
|
| 458 |
+
" \"Consider aspects such as completeness, insight, reasoning, and nuance. \"\n",
|
| 459 |
+
" \"Provide a detailed analysis of the answer's strengths and weaknesses and store in the 'markdown_analysis' property.\"\n",
|
| 460 |
+
" \"Generate a suggested system prompt that will improve the answer and store in the 'system_prompt' property.\"\n",
|
| 461 |
+
")\n",
|
| 462 |
+
"\n",
|
| 463 |
+
"# Compose the user prompt for GPT-5\n",
|
| 464 |
+
"user_prompt = (\n",
|
| 465 |
+
" f\"Prompt:\\n{top_competitor_prompt}\\n\\n\"\n",
|
| 466 |
+
" f\"Answer:\\n{top_competitor_response}\\n\\n\"\n",
|
| 467 |
+
" \"Please analyze the comprehensiveness and depth of thought of the above answer. \"\n",
|
| 468 |
+
" \"Discuss its strengths and weaknesses in detail.\"\n",
|
| 469 |
+
")\n",
|
| 470 |
+
"\n",
|
| 471 |
+
"# Call GPT-5 to perform the evaluation\n",
|
| 472 |
+
"gpt5 = OpenAI()\n",
|
| 473 |
+
"\n",
|
| 474 |
+
"# Define the tool schema\n",
|
| 475 |
+
"tools = [\n",
|
| 476 |
+
" {\n",
|
| 477 |
+
" \"type\": \"function\",\n",
|
| 478 |
+
" \"function\": {\n",
|
| 479 |
+
" \"name\": \"markdown_and_structured_data\",\n",
|
| 480 |
+
" \"description\": \"Provide both markdown analysis and structured data\",\n",
|
| 481 |
+
" \"parameters\": {\n",
|
| 482 |
+
" \"type\": \"object\",\n",
|
| 483 |
+
" \"properties\": {\n",
|
| 484 |
+
" \"markdown_analysis\": {\n",
|
| 485 |
+
" \"type\": \"string\",\n",
|
| 486 |
+
" \"description\": \"Detailed markdown analysis\"\n",
|
| 487 |
+
" },\n",
|
| 488 |
+
" \"system_prompt\": {\n",
|
| 489 |
+
" \"type\": \"string\"\n",
|
| 490 |
+
" }\n",
|
| 491 |
+
" },\n",
|
| 492 |
+
" \"required\": [\"markdown_analysis\", \"sentiment\", \"confidence\", \"key_phrases\"]\n",
|
| 493 |
+
" }\n",
|
| 494 |
+
" }\n",
|
| 495 |
+
" }\n",
|
| 496 |
+
"]\n",
|
| 497 |
+
"\n",
|
| 498 |
+
"gpt5_response = gpt5.chat.completions.create(\n",
|
| 499 |
+
" model=\"gpt-5\",\n",
|
| 500 |
+
" messages=[\n",
|
| 501 |
+
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
| 502 |
+
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
| 503 |
+
" ],\n",
|
| 504 |
+
" tools=tools,\n",
|
| 505 |
+
" tool_choice={\"type\": \"function\", \"function\": {\"name\": \"markdown_and_structured_data\"}}\n",
|
| 506 |
+
")\n",
|
| 507 |
+
"\n",
|
| 508 |
+
"tool_call = gpt5_response.choices[0].message.tool_calls[0]\n",
|
| 509 |
+
"arguments = json.loads(tool_call.function.arguments)\n",
|
| 510 |
+
"\n",
|
| 511 |
+
"markdown_analysis = arguments[\"markdown_analysis\"]\n",
|
| 512 |
+
"system_prompt = arguments[\"system_prompt\"]\n",
|
| 513 |
+
"\n",
|
| 514 |
+
"\n",
|
| 515 |
+
"\n",
|
| 516 |
+
"\n",
|
| 517 |
+
"# Display the evaluation\n",
|
| 518 |
+
"from IPython.display import Markdown, display\n",
|
| 519 |
+
"display(Markdown(\"### GPT-5 Evaluation of Top Competitor's Answer\"))\n",
|
| 520 |
+
"display(Markdown(f\"Top Competitor: {top_competitor_name}\"))\n",
|
| 521 |
+
"display(Markdown(markdown_analysis))\n",
|
| 522 |
+
"display(Markdown(\"### Suggested System Prompt\"))\n",
|
| 523 |
+
"display(Markdown(system_prompt))\n",
|
| 524 |
+
"\n",
|
| 525 |
+
"\n",
|
| 526 |
+
"# The top competitor was gemini-2.0-flash, so send the original question and suggested system prompt to generate a new response\n",
|
| 527 |
+
"# Send the system_prompt and original question to gemini-2.0-flash to generate a new answer\n",
|
| 528 |
+
"\n",
|
| 529 |
+
"gemini_response = gemini.chat.completions.create(\n",
|
| 530 |
+
" model=\"gemini-2.0-flash\",\n",
|
| 531 |
+
" messages=[\n",
|
| 532 |
+
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
| 533 |
+
" {\"role\": \"user\", \"content\": question}\n",
|
| 534 |
+
" ]\n",
|
| 535 |
+
")\n",
|
| 536 |
+
"\n",
|
| 537 |
+
"new_answer = gemini_response.choices[0].message.content\n",
|
| 538 |
+
"\n",
|
| 539 |
+
"display(Markdown(\"### Gemini-2.0-Flash New Answer with Suggested System Prompt\"))\n",
|
| 540 |
+
"display(Markdown(new_answer))\n",
|
| 541 |
+
"\n",
|
| 542 |
+
"comparison_prompt = f\"\"\"You are an expert LLM evaluator. Compare the following two answers to the same question, where the only difference is that the second answer was generated using a system prompt suggested by you (GPT-5) after evaluating the first answer.\n",
|
| 543 |
+
"\n",
|
| 544 |
+
"Original Answer (from {top_competitor_name}):\n",
|
| 545 |
+
"{top_competitor_response}\n",
|
| 546 |
+
"\n",
|
| 547 |
+
"New Answer (from {top_competitor_name} with your system prompt):\n",
|
| 548 |
+
"{new_answer}\n",
|
| 549 |
+
"\n",
|
| 550 |
+
"System Prompt Used for New Answer:\n",
|
| 551 |
+
"{system_prompt}\n",
|
| 552 |
+
"\n",
|
| 553 |
+
"Please analyze:\n",
|
| 554 |
+
"- What are the key differences between the two answers?\n",
|
| 555 |
+
"- What aspects of the system prompt likely contributed to these differences?\n",
|
| 556 |
+
"- Did the system prompt improve the quality, accuracy, or style of the answer? How?\n",
|
| 557 |
+
"- Any remaining limitations or further suggestions.\n",
|
| 558 |
+
"\n",
|
| 559 |
+
"Provide a detailed, structured analysis.\n",
|
| 560 |
+
"\"\"\"\n",
|
| 561 |
+
"\n",
|
| 562 |
+
"gpt5_comparison_response = gpt5.chat.completions.create(\n",
|
| 563 |
+
" model=\"gpt-5\",\n",
|
| 564 |
+
" messages=[\n",
|
| 565 |
+
" {\"role\": \"system\", \"content\": \"You are an expert LLM evaluator.\"},\n",
|
| 566 |
+
" {\"role\": \"user\", \"content\": comparison_prompt}\n",
|
| 567 |
+
" ]\n",
|
| 568 |
+
")\n",
|
| 569 |
+
"\n",
|
| 570 |
+
"comparison_analysis = gpt5_comparison_response.choices[0].message.content\n",
|
| 571 |
+
"\n",
|
| 572 |
+
"display(Markdown(\"### GPT-5 Analysis: Impact of System Prompt on Gemini-2.0-Flash's Answer\"))\n",
|
| 573 |
+
"display(Markdown(comparison_analysis))\n",
|
| 574 |
+
"\n",
|
| 575 |
+
"\n"
|
| 576 |
+
]
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"cell_type": "markdown",
|
| 580 |
+
"metadata": {},
|
| 581 |
+
"source": [
|
| 582 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 583 |
+
" <tr>\n",
|
| 584 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 585 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 586 |
+
" </td>\n",
|
| 587 |
+
" <td>\n",
|
| 588 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 589 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 590 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 591 |
+
" to business projects where accuracy is critical.\n",
|
| 592 |
+
" </span>\n",
|
| 593 |
+
" </td>\n",
|
| 594 |
+
" </tr>\n",
|
| 595 |
+
"</table>"
|
| 596 |
+
]
|
| 597 |
+
}
|
| 598 |
+
],
|
| 599 |
+
"metadata": {
|
| 600 |
+
"kernelspec": {
|
| 601 |
+
"display_name": ".venv",
|
| 602 |
+
"language": "python",
|
| 603 |
+
"name": "python3"
|
| 604 |
+
},
|
| 605 |
+
"language_info": {
|
| 606 |
+
"codemirror_mode": {
|
| 607 |
+
"name": "ipython",
|
| 608 |
+
"version": 3
|
| 609 |
+
},
|
| 610 |
+
"file_extension": ".py",
|
| 611 |
+
"mimetype": "text/x-python",
|
| 612 |
+
"name": "python",
|
| 613 |
+
"nbconvert_exporter": "python",
|
| 614 |
+
"pygments_lexer": "ipython3",
|
| 615 |
+
"version": "3.12.7"
|
| 616 |
+
}
|
| 617 |
+
},
|
| 618 |
+
"nbformat": 4,
|
| 619 |
+
"nbformat_minor": 2
|
| 620 |
+
}
|
community_contributions/2_lab2_evaluator_mars.ipynb
ADDED
|
@@ -0,0 +1,677 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": null,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"import os\n",
|
| 39 |
+
"import json\n",
|
| 40 |
+
"from dotenv import load_dotenv\n",
|
| 41 |
+
"from openai import OpenAI\n",
|
| 42 |
+
"from anthropic import Anthropic\n",
|
| 43 |
+
"from IPython.display import Markdown, display"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"# Always remember to do this!\n",
|
| 53 |
+
"load_dotenv(override=True)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"if openai_api_key:\n",
|
| 71 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
+
" \n",
|
| 75 |
+
"if anthropic_api_key:\n",
|
| 76 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"if google_api_key:\n",
|
| 81 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
+
"else:\n",
|
| 83 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"if deepseek_api_key:\n",
|
| 86 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
+
"else:\n",
|
| 88 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"if groq_api_key:\n",
|
| 91 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
+
"else:\n",
|
| 93 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": null,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "code",
|
| 109 |
+
"execution_count": null,
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"source": [
|
| 113 |
+
"messages"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": null,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [],
|
| 121 |
+
"source": [
|
| 122 |
+
"openai = OpenAI()\n",
|
| 123 |
+
"response = openai.chat.completions.create(\n",
|
| 124 |
+
" model=\"gpt-5-mini\",\n",
|
| 125 |
+
" messages=messages,\n",
|
| 126 |
+
")\n",
|
| 127 |
+
"question = response.choices[0].message.content\n",
|
| 128 |
+
"print(question)\n"
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"cell_type": "code",
|
| 133 |
+
"execution_count": null,
|
| 134 |
+
"metadata": {},
|
| 135 |
+
"outputs": [],
|
| 136 |
+
"source": [
|
| 137 |
+
"competitors = []\n",
|
| 138 |
+
"answers = []\n",
|
| 139 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "markdown",
|
| 144 |
+
"metadata": {},
|
| 145 |
+
"source": [
|
| 146 |
+
"## Note - update since the videos\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"I've updated the model names to use the latest models below, like GPT 5 and Claude Sonnet 4.5. It's worth noting that these models can be quite slow - like 1-2 minutes - but they do a great job! Feel free to switch them for faster models if you'd prefer, like the ones I use in the video."
|
| 149 |
+
]
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"cell_type": "code",
|
| 153 |
+
"execution_count": null,
|
| 154 |
+
"metadata": {},
|
| 155 |
+
"outputs": [],
|
| 156 |
+
"source": [
|
| 157 |
+
"# The API we know well\n",
|
| 158 |
+
"# I've updated this with the latest model, but it can take some time because it likes to think!\n",
|
| 159 |
+
"# Replace the model with gpt-4.1-mini if you'd prefer not to wait 1-2 mins\n",
|
| 160 |
+
"\n",
|
| 161 |
+
"model_name = \"gpt-5-nano\"\n",
|
| 162 |
+
"\n",
|
| 163 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 164 |
+
"answer = response.choices[0].message.content\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"display(Markdown(answer))\n",
|
| 167 |
+
"competitors.append(model_name)\n",
|
| 168 |
+
"answers.append(answer)"
|
| 169 |
+
]
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"cell_type": "code",
|
| 173 |
+
"execution_count": null,
|
| 174 |
+
"metadata": {},
|
| 175 |
+
"outputs": [],
|
| 176 |
+
"source": [
|
| 177 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 178 |
+
"\n",
|
| 179 |
+
"model_name = \"claude-sonnet-4-5\"\n",
|
| 180 |
+
"\n",
|
| 181 |
+
"claude = Anthropic()\n",
|
| 182 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=5000)\n",
|
| 183 |
+
"answer = response.content[0].text\n",
|
| 184 |
+
"\n",
|
| 185 |
+
"display(Markdown(answer))\n",
|
| 186 |
+
"competitors.append(model_name)\n",
|
| 187 |
+
"answers.append(answer)"
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"cell_type": "code",
|
| 192 |
+
"execution_count": null,
|
| 193 |
+
"metadata": {},
|
| 194 |
+
"outputs": [],
|
| 195 |
+
"source": [
|
| 196 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 197 |
+
"model_name = \"gemini-2.5-flash\"\n",
|
| 198 |
+
"\n",
|
| 199 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 200 |
+
"answer = response.choices[0].message.content\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"display(Markdown(answer))\n",
|
| 203 |
+
"competitors.append(model_name)\n",
|
| 204 |
+
"answers.append(answer)"
|
| 205 |
+
]
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"cell_type": "code",
|
| 209 |
+
"execution_count": null,
|
| 210 |
+
"metadata": {},
|
| 211 |
+
"outputs": [],
|
| 212 |
+
"source": [
|
| 213 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 214 |
+
"model_name = \"deepseek-chat\"\n",
|
| 215 |
+
"\n",
|
| 216 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 217 |
+
"answer = response.choices[0].message.content\n",
|
| 218 |
+
"\n",
|
| 219 |
+
"display(Markdown(answer))\n",
|
| 220 |
+
"competitors.append(model_name)\n",
|
| 221 |
+
"answers.append(answer)"
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"cell_type": "code",
|
| 226 |
+
"execution_count": null,
|
| 227 |
+
"metadata": {},
|
| 228 |
+
"outputs": [],
|
| 229 |
+
"source": [
|
| 230 |
+
"# Updated with the latest Open Source model from OpenAI\n",
|
| 231 |
+
"\n",
|
| 232 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 233 |
+
"model_name = \"openai/gpt-oss-120b\"\n",
|
| 234 |
+
"\n",
|
| 235 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 236 |
+
"answer = response.choices[0].message.content\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"display(Markdown(answer))\n",
|
| 239 |
+
"competitors.append(model_name)\n",
|
| 240 |
+
"answers.append(answer)\n"
|
| 241 |
+
]
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"cell_type": "markdown",
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"source": [
|
| 247 |
+
"## For the next cell, we will use Ollama\n",
|
| 248 |
+
"\n",
|
| 249 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 250 |
+
"and runs models locally using high performance C++ code.\n",
|
| 251 |
+
"\n",
|
| 252 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 253 |
+
"\n",
|
| 254 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 255 |
+
"\n",
|
| 256 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 257 |
+
"\n",
|
| 258 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 259 |
+
"\n",
|
| 260 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 261 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 262 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 263 |
+
]
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"cell_type": "markdown",
|
| 267 |
+
"metadata": {},
|
| 268 |
+
"source": [
|
| 269 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 270 |
+
" <tr>\n",
|
| 271 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 272 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 273 |
+
" </td>\n",
|
| 274 |
+
" <td>\n",
|
| 275 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 276 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 277 |
+
" </span>\n",
|
| 278 |
+
" </td>\n",
|
| 279 |
+
" </tr>\n",
|
| 280 |
+
"</table>"
|
| 281 |
+
]
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"cell_type": "code",
|
| 285 |
+
"execution_count": null,
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"outputs": [],
|
| 288 |
+
"source": [
|
| 289 |
+
"!ollama pull llama3.2"
|
| 290 |
+
]
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"cell_type": "code",
|
| 294 |
+
"execution_count": null,
|
| 295 |
+
"metadata": {},
|
| 296 |
+
"outputs": [],
|
| 297 |
+
"source": [
|
| 298 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 299 |
+
"model_name = \"llama3.2\"\n",
|
| 300 |
+
"\n",
|
| 301 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 302 |
+
"answer = response.choices[0].message.content\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"display(Markdown(answer))\n",
|
| 305 |
+
"competitors.append(model_name)\n",
|
| 306 |
+
"answers.append(answer)"
|
| 307 |
+
]
|
| 308 |
+
},
|
| 309 |
+
{
|
| 310 |
+
"cell_type": "code",
|
| 311 |
+
"execution_count": null,
|
| 312 |
+
"metadata": {},
|
| 313 |
+
"outputs": [],
|
| 314 |
+
"source": [
|
| 315 |
+
"# So where are we?\n",
|
| 316 |
+
"\n",
|
| 317 |
+
"print(competitors)\n",
|
| 318 |
+
"print(answers)\n"
|
| 319 |
+
]
|
| 320 |
+
},
|
| 321 |
+
{
|
| 322 |
+
"cell_type": "code",
|
| 323 |
+
"execution_count": null,
|
| 324 |
+
"metadata": {},
|
| 325 |
+
"outputs": [],
|
| 326 |
+
"source": [
|
| 327 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 328 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 329 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 330 |
+
]
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"cell_type": "code",
|
| 334 |
+
"execution_count": null,
|
| 335 |
+
"metadata": {},
|
| 336 |
+
"outputs": [],
|
| 337 |
+
"source": [
|
| 338 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 339 |
+
"\n",
|
| 340 |
+
"together = \"\"\n",
|
| 341 |
+
"for index, answer in enumerate(answers):\n",
|
| 342 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 343 |
+
" together += answer + \"\\n\\n\""
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"cell_type": "code",
|
| 348 |
+
"execution_count": null,
|
| 349 |
+
"metadata": {},
|
| 350 |
+
"outputs": [],
|
| 351 |
+
"source": [
|
| 352 |
+
"print(together)"
|
| 353 |
+
]
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"cell_type": "markdown",
|
| 357 |
+
"metadata": {},
|
| 358 |
+
"source": []
|
| 359 |
+
},
|
| 360 |
+
{
|
| 361 |
+
"cell_type": "code",
|
| 362 |
+
"execution_count": null,
|
| 363 |
+
"metadata": {},
|
| 364 |
+
"outputs": [],
|
| 365 |
+
"source": [
|
| 366 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 367 |
+
"Each model has been given this question:\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"{question}\n",
|
| 370 |
+
"\n",
|
| 371 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 372 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 373 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 374 |
+
"\n",
|
| 375 |
+
"Here are the responses from each competitor:\n",
|
| 376 |
+
"\n",
|
| 377 |
+
"{together}\n",
|
| 378 |
+
"\n",
|
| 379 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"cell_type": "code",
|
| 384 |
+
"execution_count": null,
|
| 385 |
+
"metadata": {},
|
| 386 |
+
"outputs": [],
|
| 387 |
+
"source": [
|
| 388 |
+
"print(judge)"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"cell_type": "code",
|
| 393 |
+
"execution_count": null,
|
| 394 |
+
"metadata": {},
|
| 395 |
+
"outputs": [],
|
| 396 |
+
"source": [
|
| 397 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"cell_type": "code",
|
| 402 |
+
"execution_count": null,
|
| 403 |
+
"metadata": {},
|
| 404 |
+
"outputs": [],
|
| 405 |
+
"source": [
|
| 406 |
+
"# Judgement time!\n",
|
| 407 |
+
"\n",
|
| 408 |
+
"openai = OpenAI()\n",
|
| 409 |
+
"response = openai.chat.completions.create(\n",
|
| 410 |
+
" model=\"gpt-5-mini\",\n",
|
| 411 |
+
" messages=judge_messages,\n",
|
| 412 |
+
")\n",
|
| 413 |
+
"results = response.choices[0].message.content\n",
|
| 414 |
+
"print(results)\n"
|
| 415 |
+
]
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"cell_type": "code",
|
| 419 |
+
"execution_count": null,
|
| 420 |
+
"metadata": {},
|
| 421 |
+
"outputs": [],
|
| 422 |
+
"source": [
|
| 423 |
+
"# OK let's turn this into results!\n",
|
| 424 |
+
"\n",
|
| 425 |
+
"results_dict = json.loads(results)\n",
|
| 426 |
+
"ranks = results_dict[\"results\"]\n",
|
| 427 |
+
"for index, result in enumerate(ranks):\n",
|
| 428 |
+
" competitor = competitors[int(result)-1]\n",
|
| 429 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 430 |
+
]
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"cell_type": "code",
|
| 434 |
+
"execution_count": null,
|
| 435 |
+
"metadata": {},
|
| 436 |
+
"outputs": [],
|
| 437 |
+
"source": [
|
| 438 |
+
"# Judgement time! from Claude\n",
|
| 439 |
+
"\n",
|
| 440 |
+
"claude = Anthropic()\n",
|
| 441 |
+
"response = claude.messages.create(model=\"claude-sonnet-4-5\", messages=judge_messages, max_tokens=5000)\n",
|
| 442 |
+
"results_claude = response.content[0].text\n",
|
| 443 |
+
"\n",
|
| 444 |
+
"print(results_claude)\n",
|
| 445 |
+
"\n",
|
| 446 |
+
"results_claude_tab = json.loads(results_claude)\n",
|
| 447 |
+
"ranks = results_claude_tab[\"results\"]\n",
|
| 448 |
+
"for index, result in enumerate(ranks):\n",
|
| 449 |
+
" competitor = competitors[int(result)-1]\n",
|
| 450 |
+
" print(f\"Rank {index+1}: {competitor}\")\n"
|
| 451 |
+
]
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"cell_type": "code",
|
| 455 |
+
"execution_count": null,
|
| 456 |
+
"metadata": {},
|
| 457 |
+
"outputs": [],
|
| 458 |
+
"source": [
|
| 459 |
+
"# Judgement time! from Gemini\n",
|
| 460 |
+
"\n",
|
| 461 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 462 |
+
"response = gemini.chat.completions.create(\n",
|
| 463 |
+
" model=\"gemini-2.5-flash\",\n",
|
| 464 |
+
" messages=judge_messages,\n",
|
| 465 |
+
")\n",
|
| 466 |
+
"results_gemini = response.choices[0].message.content\n",
|
| 467 |
+
"print(results_gemini)\n",
|
| 468 |
+
"\n",
|
| 469 |
+
"results_gemini_tab = json.loads(results_gemini)\n",
|
| 470 |
+
"ranks = results_gemini_tab[\"results\"]\n",
|
| 471 |
+
"for index, result in enumerate(ranks):\n",
|
| 472 |
+
" competitor = competitors[int(result)-1]\n",
|
| 473 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 474 |
+
]
|
| 475 |
+
},
|
| 476 |
+
{
|
| 477 |
+
"cell_type": "code",
|
| 478 |
+
"execution_count": null,
|
| 479 |
+
"metadata": {},
|
| 480 |
+
"outputs": [],
|
| 481 |
+
"source": [
|
| 482 |
+
"# Judgement time! from Deepseek\n",
|
| 483 |
+
"\n",
|
| 484 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 485 |
+
"response = deepseek.chat.completions.create(\n",
|
| 486 |
+
" model=\"deepseek-chat\",\n",
|
| 487 |
+
" messages=judge_messages,\n",
|
| 488 |
+
")\n",
|
| 489 |
+
"results_deepseek = response.choices[0].message.content\n",
|
| 490 |
+
"print(results_deepseek)\n",
|
| 491 |
+
"\n",
|
| 492 |
+
"results_deepseek_tab = json.loads(results_deepseek)\n",
|
| 493 |
+
"ranks = results_deepseek_tab[\"results\"]\n",
|
| 494 |
+
"for index, result in enumerate(ranks):\n",
|
| 495 |
+
" competitor = competitors[int(result)-1]\n",
|
| 496 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 497 |
+
]
|
| 498 |
+
},
|
| 499 |
+
{
|
| 500 |
+
"cell_type": "code",
|
| 501 |
+
"execution_count": null,
|
| 502 |
+
"metadata": {},
|
| 503 |
+
"outputs": [],
|
| 504 |
+
"source": [
|
| 505 |
+
"# Judgement time! from Groq did not work as tokens per minute requested exceeded limit (Requested ~27K, Limit 8K)\n",
|
| 506 |
+
"# Entire section commented out.\n",
|
| 507 |
+
"\n",
|
| 508 |
+
"#groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 509 |
+
"#response = groq.chat.completions.create(\n",
|
| 510 |
+
"# model=\"openai/gpt-oss-120b\",\n",
|
| 511 |
+
"# messages=judge_messages,\n",
|
| 512 |
+
"#)\n",
|
| 513 |
+
"#results_groq = response.choices[0].message.content\n",
|
| 514 |
+
"#print(results_groq)\n",
|
| 515 |
+
"\n",
|
| 516 |
+
"#results_groq_tab = json.loads(results_groq)\n",
|
| 517 |
+
"#ranks = results_groq_tab[\"results\"]\n",
|
| 518 |
+
"#for index, result in enumerate(ranks):\n",
|
| 519 |
+
"# competitor = competitors[int(result)-1]\n",
|
| 520 |
+
"# print(f\"Rank {index+1}: {competitor}\")"
|
| 521 |
+
]
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"cell_type": "code",
|
| 525 |
+
"execution_count": null,
|
| 526 |
+
"metadata": {},
|
| 527 |
+
"outputs": [],
|
| 528 |
+
"source": [
|
| 529 |
+
"import json\n",
|
| 530 |
+
"from openai import OpenAI\n",
|
| 531 |
+
"\n",
|
| 532 |
+
"#Store each model's rankings\n",
|
| 533 |
+
"rankings = {\n",
|
| 534 |
+
" \"openai-gpt-5-mini\": [\"claude-sonnet-4-5\", \"openai/gpt-oss-120b\", \"gpt-5-nano\", \"gemini-2.5-flash\", \"deepseek-chat\", \"llama3.2\"],\n",
|
| 535 |
+
" \"claude-sonnet-4-5\": [\"gpt-5-nano\", \"claude-sonnet-4-5\", \"openai/gpt-oss-120b\", \"deepseek-chat\", \"gemini-2.5-flash\", \"llama3.2\"],\n",
|
| 536 |
+
" \"gemini-2.5-flash\": [\"openai/gpt-oss-120b\", \"gemini-2.5-flash\", \"gpt-5-nano\", \"deepseek-chat\", \"claude-sonnet-4-5\", \"llama3.2\"],\n",
|
| 537 |
+
" \"deepseek-chat\": [\"openai/gpt-oss-120b\", \"gemini-2.5-flash\", \"gpt-5-nano\", \"deepseek-chat\", \"claude-sonnet-4-5\", \"llama3.2\"]\n",
|
| 538 |
+
"}\n"
|
| 539 |
+
]
|
| 540 |
+
},
|
| 541 |
+
{
|
| 542 |
+
"cell_type": "code",
|
| 543 |
+
"execution_count": null,
|
| 544 |
+
"metadata": {},
|
| 545 |
+
"outputs": [],
|
| 546 |
+
"source": [
|
| 547 |
+
"#Compute average rank per model\n",
|
| 548 |
+
"scores = {}\n",
|
| 549 |
+
"for model_name in rankings[list(rankings.keys())[0]]: # iterate over unique models\n",
|
| 550 |
+
" total_rank = 0\n",
|
| 551 |
+
" for judge, ranks in rankings.items():\n",
|
| 552 |
+
" total_rank += ranks.index(model_name) + 1 # ranks start at 1\n",
|
| 553 |
+
" scores[model_name] = total_rank / len(rankings)"
|
| 554 |
+
]
|
| 555 |
+
},
|
| 556 |
+
{
|
| 557 |
+
"cell_type": "code",
|
| 558 |
+
"execution_count": null,
|
| 559 |
+
"metadata": {},
|
| 560 |
+
"outputs": [],
|
| 561 |
+
"source": [
|
| 562 |
+
"#Sort by average rank\n",
|
| 563 |
+
"sorted_scores = sorted(scores.items(), key=lambda x: x[1])\n",
|
| 564 |
+
"\n",
|
| 565 |
+
"print(\"\\n📊 Average Rank Results:\")\n",
|
| 566 |
+
"for i, (model, avg_rank) in enumerate(sorted_scores, 1):\n",
|
| 567 |
+
" print(f\"{i}. {model} — Average Rank: {avg_rank:.2f}\")"
|
| 568 |
+
]
|
| 569 |
+
},
|
| 570 |
+
{
|
| 571 |
+
"cell_type": "code",
|
| 572 |
+
"execution_count": null,
|
| 573 |
+
"metadata": {},
|
| 574 |
+
"outputs": [],
|
| 575 |
+
"source": [
|
| 576 |
+
"#Prepare data for LLM evaluation\n",
|
| 577 |
+
"summary_prompt = f\"\"\"\n",
|
| 578 |
+
"We collected ranking data from multiple LLMs judging each other. \n",
|
| 579 |
+
"Here are the average ranks (lower is better):\n",
|
| 580 |
+
"\n",
|
| 581 |
+
"{json.dumps(scores, indent=2)}\n",
|
| 582 |
+
"\n",
|
| 583 |
+
"Please:\n",
|
| 584 |
+
"1. Provide a fairness-adjusted score (1–10) for each model.\n",
|
| 585 |
+
"2. Identify which model appears most consistent or robust across judges.\n",
|
| 586 |
+
"3. Summarize in 3 concise bullet points why the top model stands out.\n",
|
| 587 |
+
"\"\"\""
|
| 588 |
+
]
|
| 589 |
+
},
|
| 590 |
+
{
|
| 591 |
+
"cell_type": "code",
|
| 592 |
+
"execution_count": null,
|
| 593 |
+
"metadata": {},
|
| 594 |
+
"outputs": [],
|
| 595 |
+
"source": [
|
| 596 |
+
"# Send to an Chat GPT-5 for reasoning\n",
|
| 597 |
+
"openai = OpenAI()\n",
|
| 598 |
+
"response = openai.chat.completions.create(\n",
|
| 599 |
+
" model=\"gpt-5-mini\",\n",
|
| 600 |
+
" messages=[\n",
|
| 601 |
+
" {\"role\": \"system\", \"content\": \"You are a neutral AI judge analyzing LLM ranking consistency.\"},\n",
|
| 602 |
+
" {\"role\": \"user\", \"content\": summary_prompt}\n",
|
| 603 |
+
" ])"
|
| 604 |
+
]
|
| 605 |
+
},
|
| 606 |
+
{
|
| 607 |
+
"cell_type": "code",
|
| 608 |
+
"execution_count": null,
|
| 609 |
+
"metadata": {},
|
| 610 |
+
"outputs": [],
|
| 611 |
+
"source": [
|
| 612 |
+
"#Display the analysis\n",
|
| 613 |
+
"print(\"\\n🤖 LLM Evaluation Summary:\\n\")\n",
|
| 614 |
+
"print(response.choices[0].message.content)"
|
| 615 |
+
]
|
| 616 |
+
},
|
| 617 |
+
{
|
| 618 |
+
"cell_type": "markdown",
|
| 619 |
+
"metadata": {},
|
| 620 |
+
"source": [
|
| 621 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 622 |
+
" <tr>\n",
|
| 623 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 624 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 625 |
+
" </td>\n",
|
| 626 |
+
" <td>\n",
|
| 627 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 628 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 629 |
+
" </span>\n",
|
| 630 |
+
" </td>\n",
|
| 631 |
+
" </tr>\n",
|
| 632 |
+
"</table>"
|
| 633 |
+
]
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"cell_type": "markdown",
|
| 637 |
+
"metadata": {},
|
| 638 |
+
"source": [
|
| 639 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 640 |
+
" <tr>\n",
|
| 641 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 642 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 643 |
+
" </td>\n",
|
| 644 |
+
" <td>\n",
|
| 645 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 646 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 647 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 648 |
+
" to business projects where accuracy is critical.\n",
|
| 649 |
+
" </span>\n",
|
| 650 |
+
" </td>\n",
|
| 651 |
+
" </tr>\n",
|
| 652 |
+
"</table>"
|
| 653 |
+
]
|
| 654 |
+
}
|
| 655 |
+
],
|
| 656 |
+
"metadata": {
|
| 657 |
+
"kernelspec": {
|
| 658 |
+
"display_name": ".venv",
|
| 659 |
+
"language": "python",
|
| 660 |
+
"name": "python3"
|
| 661 |
+
},
|
| 662 |
+
"language_info": {
|
| 663 |
+
"codemirror_mode": {
|
| 664 |
+
"name": "ipython",
|
| 665 |
+
"version": 3
|
| 666 |
+
},
|
| 667 |
+
"file_extension": ".py",
|
| 668 |
+
"mimetype": "text/x-python",
|
| 669 |
+
"name": "python",
|
| 670 |
+
"nbconvert_exporter": "python",
|
| 671 |
+
"pygments_lexer": "ipython3",
|
| 672 |
+
"version": "3.12.12"
|
| 673 |
+
}
|
| 674 |
+
},
|
| 675 |
+
"nbformat": 4,
|
| 676 |
+
"nbformat_minor": 2
|
| 677 |
+
}
|
community_contributions/2_lab2_exercise.ipynb
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# From Judging to Synthesizing — Evolving Multi-Agent Patterns\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"In the original 2_lab2.ipynb, we explored a powerful agentic design pattern: sending the same question to multiple large language models (LLMs), then using a separate “judge” agent to evaluate and rank their responses. This approach is valuable for identifying the single best answer among many, leveraging the strengths of ensemble reasoning and critical evaluation.\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"However, selecting just one “winner” can leave valuable insights from other models untapped. To address this, I am shifting to a new agentic pattern in this notebook: the synthesizer/improver pattern. Instead of merely ranking responses, we will prompt a dedicated LLM to review all answers, extract the most compelling ideas from each, and synthesize them into a single, improved response. \n",
|
| 12 |
+
"\n",
|
| 13 |
+
"This approach aims to combine the collective intelligence of multiple models, producing an answer that is richer, more nuanced, and more robust than any individual response.\n"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"cell_type": "code",
|
| 18 |
+
"execution_count": 1,
|
| 19 |
+
"metadata": {},
|
| 20 |
+
"outputs": [],
|
| 21 |
+
"source": [
|
| 22 |
+
"import os\n",
|
| 23 |
+
"import json\n",
|
| 24 |
+
"from dotenv import load_dotenv\n",
|
| 25 |
+
"from openai import OpenAI\n",
|
| 26 |
+
"from anthropic import Anthropic\n",
|
| 27 |
+
"from IPython.display import Markdown, display"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": null,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"load_dotenv(override=True)"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "code",
|
| 41 |
+
"execution_count": null,
|
| 42 |
+
"metadata": {},
|
| 43 |
+
"outputs": [],
|
| 44 |
+
"source": [
|
| 45 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 46 |
+
"\n",
|
| 47 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 48 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 49 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 50 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 51 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 52 |
+
"\n",
|
| 53 |
+
"if openai_api_key:\n",
|
| 54 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 55 |
+
"else:\n",
|
| 56 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 57 |
+
" \n",
|
| 58 |
+
"if anthropic_api_key:\n",
|
| 59 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 60 |
+
"else:\n",
|
| 61 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 62 |
+
"\n",
|
| 63 |
+
"if google_api_key:\n",
|
| 64 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 65 |
+
"else:\n",
|
| 66 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"if deepseek_api_key:\n",
|
| 69 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 70 |
+
"else:\n",
|
| 71 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 72 |
+
"\n",
|
| 73 |
+
"if groq_api_key:\n",
|
| 74 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 75 |
+
"else:\n",
|
| 76 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 7,
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"outputs": [],
|
| 84 |
+
"source": [
|
| 85 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their collective intelligence. \"\n",
|
| 86 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 87 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 88 |
+
]
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"cell_type": "code",
|
| 92 |
+
"execution_count": null,
|
| 93 |
+
"metadata": {},
|
| 94 |
+
"outputs": [],
|
| 95 |
+
"source": [
|
| 96 |
+
"messages"
|
| 97 |
+
]
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"cell_type": "code",
|
| 101 |
+
"execution_count": null,
|
| 102 |
+
"metadata": {},
|
| 103 |
+
"outputs": [],
|
| 104 |
+
"source": [
|
| 105 |
+
"openai = OpenAI()\n",
|
| 106 |
+
"response = openai.chat.completions.create(\n",
|
| 107 |
+
" model=\"gpt-4o-mini\",\n",
|
| 108 |
+
" messages=messages,\n",
|
| 109 |
+
")\n",
|
| 110 |
+
"question = response.choices[0].message.content\n",
|
| 111 |
+
"print(question)\n"
|
| 112 |
+
]
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"cell_type": "code",
|
| 116 |
+
"execution_count": 10,
|
| 117 |
+
"metadata": {},
|
| 118 |
+
"outputs": [],
|
| 119 |
+
"source": [
|
| 120 |
+
"teammates = []\n",
|
| 121 |
+
"answers = []\n",
|
| 122 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 123 |
+
]
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"cell_type": "code",
|
| 127 |
+
"execution_count": null,
|
| 128 |
+
"metadata": {},
|
| 129 |
+
"outputs": [],
|
| 130 |
+
"source": [
|
| 131 |
+
"# The API we know well\n",
|
| 132 |
+
"\n",
|
| 133 |
+
"model_name = \"gpt-4o-mini\"\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 136 |
+
"answer = response.choices[0].message.content\n",
|
| 137 |
+
"\n",
|
| 138 |
+
"display(Markdown(answer))\n",
|
| 139 |
+
"teammates.append(model_name)\n",
|
| 140 |
+
"answers.append(answer)"
|
| 141 |
+
]
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"cell_type": "code",
|
| 145 |
+
"execution_count": null,
|
| 146 |
+
"metadata": {},
|
| 147 |
+
"outputs": [],
|
| 148 |
+
"source": [
|
| 149 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 150 |
+
"\n",
|
| 151 |
+
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"claude = Anthropic()\n",
|
| 154 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 155 |
+
"answer = response.content[0].text\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"display(Markdown(answer))\n",
|
| 158 |
+
"teammates.append(model_name)\n",
|
| 159 |
+
"answers.append(answer)"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": null,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"outputs": [],
|
| 167 |
+
"source": [
|
| 168 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 169 |
+
"model_name = \"gemini-2.0-flash\"\n",
|
| 170 |
+
"\n",
|
| 171 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 172 |
+
"answer = response.choices[0].message.content\n",
|
| 173 |
+
"\n",
|
| 174 |
+
"display(Markdown(answer))\n",
|
| 175 |
+
"teammates.append(model_name)\n",
|
| 176 |
+
"answers.append(answer)"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"cell_type": "code",
|
| 181 |
+
"execution_count": null,
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"outputs": [],
|
| 184 |
+
"source": [
|
| 185 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 186 |
+
"model_name = \"deepseek-chat\"\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
+
"answer = response.choices[0].message.content\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"display(Markdown(answer))\n",
|
| 192 |
+
"teammates.append(model_name)\n",
|
| 193 |
+
"answers.append(answer)"
|
| 194 |
+
]
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"cell_type": "code",
|
| 198 |
+
"execution_count": null,
|
| 199 |
+
"metadata": {},
|
| 200 |
+
"outputs": [],
|
| 201 |
+
"source": [
|
| 202 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 203 |
+
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
+
"answer = response.choices[0].message.content\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"display(Markdown(answer))\n",
|
| 209 |
+
"teammates.append(model_name)\n",
|
| 210 |
+
"answers.append(answer)"
|
| 211 |
+
]
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"cell_type": "code",
|
| 215 |
+
"execution_count": null,
|
| 216 |
+
"metadata": {},
|
| 217 |
+
"outputs": [],
|
| 218 |
+
"source": [
|
| 219 |
+
"# So where are we?\n",
|
| 220 |
+
"\n",
|
| 221 |
+
"print(teammates)\n",
|
| 222 |
+
"print(answers)"
|
| 223 |
+
]
|
| 224 |
+
},
|
| 225 |
+
{
|
| 226 |
+
"cell_type": "code",
|
| 227 |
+
"execution_count": null,
|
| 228 |
+
"metadata": {},
|
| 229 |
+
"outputs": [],
|
| 230 |
+
"source": [
|
| 231 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 232 |
+
"for teammate, answer in zip(teammates, answers):\n",
|
| 233 |
+
" print(f\"Teammate: {teammate}\\n\\n{answer}\")"
|
| 234 |
+
]
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"cell_type": "code",
|
| 238 |
+
"execution_count": 23,
|
| 239 |
+
"metadata": {},
|
| 240 |
+
"outputs": [],
|
| 241 |
+
"source": [
|
| 242 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 243 |
+
"\n",
|
| 244 |
+
"together = \"\"\n",
|
| 245 |
+
"for index, answer in enumerate(answers):\n",
|
| 246 |
+
" together += f\"# Response from teammate {index+1}\\n\\n\"\n",
|
| 247 |
+
" together += answer + \"\\n\\n\""
|
| 248 |
+
]
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"cell_type": "code",
|
| 252 |
+
"execution_count": null,
|
| 253 |
+
"metadata": {},
|
| 254 |
+
"outputs": [],
|
| 255 |
+
"source": [
|
| 256 |
+
"print(together)"
|
| 257 |
+
]
|
| 258 |
+
},
|
| 259 |
+
{
|
| 260 |
+
"cell_type": "code",
|
| 261 |
+
"execution_count": 36,
|
| 262 |
+
"metadata": {},
|
| 263 |
+
"outputs": [],
|
| 264 |
+
"source": [
|
| 265 |
+
"formatter = f\"\"\"You are taking the nost interesting ideas fron {len(teammates)} teammates.\n",
|
| 266 |
+
"Each model has been given this question:\n",
|
| 267 |
+
"\n",
|
| 268 |
+
"{question}\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"Your job is to evaluate each response for clarity and strength of argument, select the most relevant ideas and make a report, including a title, subtitles to separate sections, and quoting the LLM providing the idea.\n",
|
| 271 |
+
"From that, you will create a new improved answer.\"\"\""
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": null,
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"outputs": [],
|
| 279 |
+
"source": [
|
| 280 |
+
"print(formatter)"
|
| 281 |
+
]
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"cell_type": "code",
|
| 285 |
+
"execution_count": 38,
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"outputs": [],
|
| 288 |
+
"source": [
|
| 289 |
+
"formatter_messages = [{\"role\": \"user\", \"content\": formatter}]"
|
| 290 |
+
]
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"cell_type": "code",
|
| 294 |
+
"execution_count": null,
|
| 295 |
+
"metadata": {},
|
| 296 |
+
"outputs": [],
|
| 297 |
+
"source": [
|
| 298 |
+
"openai = OpenAI()\n",
|
| 299 |
+
"response = openai.chat.completions.create(\n",
|
| 300 |
+
" model=\"o3-mini\",\n",
|
| 301 |
+
" messages=formatter_messages,\n",
|
| 302 |
+
")\n",
|
| 303 |
+
"results = response.choices[0].message.content\n",
|
| 304 |
+
"display(Markdown(results))"
|
| 305 |
+
]
|
| 306 |
+
},
|
| 307 |
+
{
|
| 308 |
+
"cell_type": "code",
|
| 309 |
+
"execution_count": null,
|
| 310 |
+
"metadata": {},
|
| 311 |
+
"outputs": [],
|
| 312 |
+
"source": []
|
| 313 |
+
}
|
| 314 |
+
],
|
| 315 |
+
"metadata": {
|
| 316 |
+
"kernelspec": {
|
| 317 |
+
"display_name": ".venv",
|
| 318 |
+
"language": "python",
|
| 319 |
+
"name": "python3"
|
| 320 |
+
},
|
| 321 |
+
"language_info": {
|
| 322 |
+
"codemirror_mode": {
|
| 323 |
+
"name": "ipython",
|
| 324 |
+
"version": 3
|
| 325 |
+
},
|
| 326 |
+
"file_extension": ".py",
|
| 327 |
+
"mimetype": "text/x-python",
|
| 328 |
+
"name": "python",
|
| 329 |
+
"nbconvert_exporter": "python",
|
| 330 |
+
"pygments_lexer": "ipython3",
|
| 331 |
+
"version": "3.12.7"
|
| 332 |
+
}
|
| 333 |
+
},
|
| 334 |
+
"nbformat": 4,
|
| 335 |
+
"nbformat_minor": 2
|
| 336 |
+
}
|
community_contributions/2_lab2_exercise_BrettSanders_ChainOfThought.ipynb
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "raw",
|
| 5 |
+
"metadata": {
|
| 6 |
+
"vscode": {
|
| 7 |
+
"languageId": "raw"
|
| 8 |
+
}
|
| 9 |
+
},
|
| 10 |
+
"source": [
|
| 11 |
+
"# Lab 2 Exercise - Extending the Patterns\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"This notebook extends the original lab by adding the Chain of Thought pattern to enhance the evaluation process.\n"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"cell_type": "code",
|
| 18 |
+
"execution_count": 1,
|
| 19 |
+
"metadata": {},
|
| 20 |
+
"outputs": [],
|
| 21 |
+
"source": [
|
| 22 |
+
"# Import required packages\n",
|
| 23 |
+
"import os\n",
|
| 24 |
+
"import json\n",
|
| 25 |
+
"from dotenv import load_dotenv\n",
|
| 26 |
+
"from openai import OpenAI\n",
|
| 27 |
+
"from anthropic import Anthropic\n",
|
| 28 |
+
"from IPython.display import Markdown, display\n"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "code",
|
| 33 |
+
"execution_count": null,
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"outputs": [],
|
| 36 |
+
"source": [
|
| 37 |
+
"# Load environment variables\n",
|
| 38 |
+
"load_dotenv(override=True)\n"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"cell_type": "code",
|
| 43 |
+
"execution_count": 3,
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [],
|
| 46 |
+
"source": [
|
| 47 |
+
"# Initialize API clients\n",
|
| 48 |
+
"openai = OpenAI()\n",
|
| 49 |
+
"claude = Anthropic()\n"
|
| 50 |
+
]
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"cell_type": "code",
|
| 54 |
+
"execution_count": null,
|
| 55 |
+
"metadata": {},
|
| 56 |
+
"outputs": [],
|
| 57 |
+
"source": [
|
| 58 |
+
"# Original question generation\n",
|
| 59 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 60 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 61 |
+
"messages = [{\"role\": \"user\", \"content\": request}]\n",
|
| 62 |
+
"\n",
|
| 63 |
+
"response = openai.chat.completions.create(\n",
|
| 64 |
+
" model=\"gpt-4o-mini\",\n",
|
| 65 |
+
" messages=messages,\n",
|
| 66 |
+
")\n",
|
| 67 |
+
"question = response.choices[0].message.content\n",
|
| 68 |
+
"print(question)\n"
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"cell_type": "code",
|
| 73 |
+
"execution_count": null,
|
| 74 |
+
"metadata": {},
|
| 75 |
+
"outputs": [],
|
| 76 |
+
"source": [
|
| 77 |
+
"# Get responses from multiple models\n",
|
| 78 |
+
"competitors = []\n",
|
| 79 |
+
"answers = []\n",
|
| 80 |
+
"messages = [{\"role\": \"user\", \"content\": question}]\n",
|
| 81 |
+
"\n",
|
| 82 |
+
"# OpenAI\n",
|
| 83 |
+
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
|
| 84 |
+
"answer = response.choices[0].message.content\n",
|
| 85 |
+
"competitors.append(\"gpt-4o-mini\")\n",
|
| 86 |
+
"answers.append(answer)\n",
|
| 87 |
+
"display(Markdown(answer))\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"# Claude\n",
|
| 90 |
+
"response = claude.messages.create(model=\"claude-3-7-sonnet-latest\", messages=messages, max_tokens=1000)\n",
|
| 91 |
+
"answer = response.content[0].text\n",
|
| 92 |
+
"competitors.append(\"claude-3-7-sonnet-latest\")\n",
|
| 93 |
+
"answers.append(answer)\n",
|
| 94 |
+
"display(Markdown(answer))\n"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": 6,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"# NEW: Chain of Thought Evaluation\n",
|
| 104 |
+
"# First, let's create a detailed evaluation prompt that encourages step-by-step reasoning\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"evaluation_prompt = f\"\"\"You are an expert evaluator of AI responses. Your task is to analyze and rank the following responses to this question:\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"{question}\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"Please follow these steps in your evaluation:\n",
|
| 111 |
+
"\n",
|
| 112 |
+
"1. For each response:\n",
|
| 113 |
+
" - Identify the main arguments presented\n",
|
| 114 |
+
" - Evaluate the clarity and coherence of the reasoning\n",
|
| 115 |
+
" - Assess the depth and breadth of the analysis\n",
|
| 116 |
+
" - Note any unique insights or perspectives\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"2. Compare the responses:\n",
|
| 119 |
+
" - How do they differ in their approach?\n",
|
| 120 |
+
" - Which response demonstrates the most sophisticated understanding?\n",
|
| 121 |
+
" - Which response provides the most practical and actionable insights?\n",
|
| 122 |
+
"\n",
|
| 123 |
+
"3. Provide your final ranking with detailed justification for each position.\n",
|
| 124 |
+
"\n",
|
| 125 |
+
"Here are the responses:\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"{'\\\\n\\\\n'.join([f'Response {i+1} ({competitors[i]}):\\\\n{answer}' for i, answer in enumerate(answers)])}\n",
|
| 128 |
+
"\n",
|
| 129 |
+
"Please provide your evaluation in JSON format with the following structure:\n",
|
| 130 |
+
"{{\n",
|
| 131 |
+
" \"detailed_analysis\": [\n",
|
| 132 |
+
" {{\"competitor\": \"name\", \"strengths\": [], \"weaknesses\": [], \"unique_aspects\": []}},\n",
|
| 133 |
+
" ...\n",
|
| 134 |
+
" ],\n",
|
| 135 |
+
" \"comparative_analysis\": \"detailed comparison of responses\",\n",
|
| 136 |
+
" \"final_ranking\": [\"ranked competitor numbers\"],\n",
|
| 137 |
+
" \"justification\": \"detailed explanation of the ranking\"\n",
|
| 138 |
+
"}}\"\"\"\n"
|
| 139 |
+
]
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"cell_type": "code",
|
| 143 |
+
"execution_count": null,
|
| 144 |
+
"metadata": {},
|
| 145 |
+
"outputs": [],
|
| 146 |
+
"source": [
|
| 147 |
+
"# Get the detailed evaluation\n",
|
| 148 |
+
"evaluation_messages = [{\"role\": \"user\", \"content\": evaluation_prompt}]\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"response = openai.chat.completions.create(\n",
|
| 151 |
+
" model=\"gpt-4o-mini\",\n",
|
| 152 |
+
" messages=evaluation_messages,\n",
|
| 153 |
+
")\n",
|
| 154 |
+
"detailed_evaluation = response.choices[0].message.content\n",
|
| 155 |
+
"print(detailed_evaluation)\n"
|
| 156 |
+
]
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"cell_type": "code",
|
| 160 |
+
"execution_count": null,
|
| 161 |
+
"metadata": {},
|
| 162 |
+
"outputs": [],
|
| 163 |
+
"source": [
|
| 164 |
+
"# Parse and display the results in a more readable format\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"# Clean up the JSON string by removing markdown code block markers\n",
|
| 167 |
+
"json_str = detailed_evaluation.replace(\"```json\", \"\").replace(\"```\", \"\").strip()\n",
|
| 168 |
+
"\n",
|
| 169 |
+
"evaluation_dict = json.loads(json_str)\n",
|
| 170 |
+
"\n",
|
| 171 |
+
"print(\"Detailed Analysis:\")\n",
|
| 172 |
+
"for analysis in evaluation_dict[\"detailed_analysis\"]:\n",
|
| 173 |
+
" print(f\"\\nCompetitor: {analysis['competitor']}\")\n",
|
| 174 |
+
" print(\"Strengths:\")\n",
|
| 175 |
+
" for strength in analysis['strengths']:\n",
|
| 176 |
+
" print(f\"- {strength}\")\n",
|
| 177 |
+
" print(\"\\nWeaknesses:\")\n",
|
| 178 |
+
" for weakness in analysis['weaknesses']:\n",
|
| 179 |
+
" print(f\"- {weakness}\")\n",
|
| 180 |
+
" print(\"\\nUnique Aspects:\")\n",
|
| 181 |
+
" for aspect in analysis['unique_aspects']:\n",
|
| 182 |
+
" print(f\"- {aspect}\")\n",
|
| 183 |
+
"\n",
|
| 184 |
+
"print(\"\\nComparative Analysis:\")\n",
|
| 185 |
+
"print(evaluation_dict[\"comparative_analysis\"])\n",
|
| 186 |
+
"\n",
|
| 187 |
+
"print(\"\\nFinal Ranking:\")\n",
|
| 188 |
+
"for i, rank in enumerate(evaluation_dict[\"final_ranking\"]):\n",
|
| 189 |
+
" print(f\"{i+1}. {competitors[int(rank)-1]}\")\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"print(\"\\nJustification:\")\n",
|
| 192 |
+
"print(evaluation_dict[\"justification\"])\n"
|
| 193 |
+
]
|
| 194 |
+
},
|
| 195 |
+
{
|
| 196 |
+
"cell_type": "raw",
|
| 197 |
+
"metadata": {
|
| 198 |
+
"vscode": {
|
| 199 |
+
"languageId": "raw"
|
| 200 |
+
}
|
| 201 |
+
},
|
| 202 |
+
"source": [
|
| 203 |
+
"## Pattern Analysis\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"This enhanced version uses several agentic design patterns:\n",
|
| 206 |
+
"\n",
|
| 207 |
+
"1. **Multi-agent Collaboration**: Sending the same question to multiple LLMs\n",
|
| 208 |
+
"2. **Evaluation/Judgment Pattern**: Using one LLM to evaluate responses from others\n",
|
| 209 |
+
"3. **Parallel Processing**: Running multiple models simultaneously\n",
|
| 210 |
+
"4. **Chain of Thought**: Added a structured, step-by-step evaluation process that breaks down the analysis into clear stages\n",
|
| 211 |
+
"\n",
|
| 212 |
+
"The Chain of Thought pattern is particularly valuable here because it:\n",
|
| 213 |
+
"- Forces the evaluator to consider multiple aspects of each response\n",
|
| 214 |
+
"- Provides more detailed and structured feedback\n",
|
| 215 |
+
"- Makes the evaluation process more transparent and explainable\n",
|
| 216 |
+
"- Helps identify specific strengths and weaknesses in each response\n"
|
| 217 |
+
]
|
| 218 |
+
}
|
| 219 |
+
],
|
| 220 |
+
"metadata": {
|
| 221 |
+
"kernelspec": {
|
| 222 |
+
"display_name": ".venv",
|
| 223 |
+
"language": "python",
|
| 224 |
+
"name": "python3"
|
| 225 |
+
},
|
| 226 |
+
"language_info": {
|
| 227 |
+
"codemirror_mode": {
|
| 228 |
+
"name": "ipython",
|
| 229 |
+
"version": 3
|
| 230 |
+
},
|
| 231 |
+
"file_extension": ".py",
|
| 232 |
+
"mimetype": "text/x-python",
|
| 233 |
+
"name": "python",
|
| 234 |
+
"nbconvert_exporter": "python",
|
| 235 |
+
"pygments_lexer": "ipython3",
|
| 236 |
+
"version": "3.12.7"
|
| 237 |
+
}
|
| 238 |
+
},
|
| 239 |
+
"nbformat": 4,
|
| 240 |
+
"nbformat_minor": 2
|
| 241 |
+
}
|
community_contributions/2_lab2_llm_reviewer.ipynb
ADDED
|
@@ -0,0 +1,627 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"This notebook extends the original by adding a reviewer pattern to evaluate the impact on model performance.\n",
|
| 17 |
+
"\n",
|
| 18 |
+
"In the new workflow, each model's answer is provided to a \"reviewer LLM\" who is prompted to \"Evaluate the response for clarity and strength of argument, and provide constructive suggestions for improving the answer.\" Each model is then given the chance to revise its answer based on the feedback but is also told, \"You are not required to take any of the feedback into account, but you want to win the competition.\"\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"<table>\n",
|
| 21 |
+
" <caption style=\"font-size: 1.2em; margin-bottom: 10px;\"><strong>Results for Representative Run</strong></caption>\n",
|
| 22 |
+
" <thead>\n",
|
| 23 |
+
" <tr>\n",
|
| 24 |
+
" <th>Model</th>\n",
|
| 25 |
+
" <th>Original Rank</th>\n",
|
| 26 |
+
" <th>Exclusive Feedback</th>\n",
|
| 27 |
+
" <th>With Feedback (all models)</th>\n",
|
| 28 |
+
" </tr>\n",
|
| 29 |
+
" </thead>\n",
|
| 30 |
+
" <tbody>\n",
|
| 31 |
+
" <tr>\n",
|
| 32 |
+
" <td>gpt-4o-mini</td>\n",
|
| 33 |
+
" <td>2</td>\n",
|
| 34 |
+
" <td>3</td>\n",
|
| 35 |
+
" <td>4</td>\n",
|
| 36 |
+
" </tr>\n",
|
| 37 |
+
" <tr>\n",
|
| 38 |
+
" <td>claude-3-7-sonnet-latest</td>\n",
|
| 39 |
+
" <td>6</td>\n",
|
| 40 |
+
" <td>1</td>\n",
|
| 41 |
+
" <td>1</td>\n",
|
| 42 |
+
" </tr>\n",
|
| 43 |
+
" <tr>\n",
|
| 44 |
+
" <td>gemini-2.0-flash</td>\n",
|
| 45 |
+
" <td>1</td>\n",
|
| 46 |
+
" <td>1</td>\n",
|
| 47 |
+
" <td>2</td>\n",
|
| 48 |
+
" </tr>\n",
|
| 49 |
+
" <tr>\n",
|
| 50 |
+
" <td>deepseek-chat</td>\n",
|
| 51 |
+
" <td>3</td>\n",
|
| 52 |
+
" <td>2</td>\n",
|
| 53 |
+
" <td>3</td>\n",
|
| 54 |
+
" </tr>\n",
|
| 55 |
+
" <tr>\n",
|
| 56 |
+
" <td>llama-3.3-70b-versatile</td>\n",
|
| 57 |
+
" <td>4</td>\n",
|
| 58 |
+
" <td>3</td>\n",
|
| 59 |
+
" <td>5</td>\n",
|
| 60 |
+
" </tr>\n",
|
| 61 |
+
" <tr>\n",
|
| 62 |
+
" <td>llama3.2</td>\n",
|
| 63 |
+
" <td>5</td>\n",
|
| 64 |
+
" <td>4</td>\n",
|
| 65 |
+
" <td>6</td>\n",
|
| 66 |
+
" </tr>\n",
|
| 67 |
+
" </tbody>\n",
|
| 68 |
+
"</table>\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"The workflow is obviously non-deterministic and the results can vary greatly from run to run, but the introduction of a reviewer appeared to have a generaly positive impact on performance. The table above shows the results for a representative run. It compares each model's rank versus the other models when it exclusively received feedback. The table also shows the ranking when ALL models received feedback. Exclusive use of feedback improved a model's ranking for five out of six models and decreased it for one model.\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"Inspired by some other contributions, this worksheet also makes LLM calls asyncrhonously to reduce wait time."
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"cell_type": "code",
|
| 77 |
+
"execution_count": 23,
|
| 78 |
+
"metadata": {},
|
| 79 |
+
"outputs": [],
|
| 80 |
+
"source": [
|
| 81 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 82 |
+
"#!uv add prettytable\n",
|
| 83 |
+
"\n",
|
| 84 |
+
"import os\n",
|
| 85 |
+
"import asyncio\n",
|
| 86 |
+
"import json\n",
|
| 87 |
+
"from dotenv import load_dotenv\n",
|
| 88 |
+
"from openai import OpenAI, AsyncOpenAI\n",
|
| 89 |
+
"from anthropic import AsyncAnthropic\n",
|
| 90 |
+
"from IPython.display import display\n",
|
| 91 |
+
"from pydantic import BaseModel, Field\n",
|
| 92 |
+
"from string import Template\n",
|
| 93 |
+
"from prettytable import PrettyTable\n",
|
| 94 |
+
"\n",
|
| 95 |
+
"\n"
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"cell_type": "code",
|
| 100 |
+
"execution_count": 24,
|
| 101 |
+
"metadata": {},
|
| 102 |
+
"outputs": [],
|
| 103 |
+
"source": [
|
| 104 |
+
"class LLMResult(BaseModel):\n",
|
| 105 |
+
" model: str\n",
|
| 106 |
+
" answer: str\n",
|
| 107 |
+
" feedback: str | None =Field(\n",
|
| 108 |
+
" default = None, \n",
|
| 109 |
+
" description=\"Mutable field. This will be set by the reviewer.\")\n",
|
| 110 |
+
" revised_answer: str | None =Field(\n",
|
| 111 |
+
" default = None, \n",
|
| 112 |
+
" description=\"Mutable field. This will be set by the answerer after the reviewer has provided feedback.\")\n",
|
| 113 |
+
" original_rank: int | None =Field(\n",
|
| 114 |
+
" default = None, \n",
|
| 115 |
+
" description=\"Mutable field. Rank when no feedback is used by any models.\")\n",
|
| 116 |
+
" exclusive_feedback: str | None =Field(\n",
|
| 117 |
+
" default = None, \n",
|
| 118 |
+
" description=\"Mutable field. Rank when only this model used feedback.\")\n",
|
| 119 |
+
" revised_rank: int | None =Field(\n",
|
| 120 |
+
" default = None, \n",
|
| 121 |
+
" description=\"Mutable field. Rank when all models used feedback.\")\n",
|
| 122 |
+
"\n",
|
| 123 |
+
"results : list[LLMResult] = []\n"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"cell_type": "code",
|
| 128 |
+
"execution_count": null,
|
| 129 |
+
"metadata": {},
|
| 130 |
+
"outputs": [],
|
| 131 |
+
"source": [
|
| 132 |
+
"# Always remember to do this!\n",
|
| 133 |
+
"load_dotenv(override=True)"
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"cell_type": "code",
|
| 138 |
+
"execution_count": null,
|
| 139 |
+
"metadata": {},
|
| 140 |
+
"outputs": [],
|
| 141 |
+
"source": [
|
| 142 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 143 |
+
"\n",
|
| 144 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 145 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 146 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 147 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 148 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"if openai_api_key:\n",
|
| 151 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 152 |
+
"else:\n",
|
| 153 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 154 |
+
" \n",
|
| 155 |
+
"if anthropic_api_key:\n",
|
| 156 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 157 |
+
"else:\n",
|
| 158 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 159 |
+
"\n",
|
| 160 |
+
"if google_api_key:\n",
|
| 161 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 162 |
+
"else:\n",
|
| 163 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 164 |
+
"\n",
|
| 165 |
+
"if deepseek_api_key:\n",
|
| 166 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 167 |
+
"else:\n",
|
| 168 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"if groq_api_key:\n",
|
| 171 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 172 |
+
"else:\n",
|
| 173 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 174 |
+
]
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"cell_type": "code",
|
| 178 |
+
"execution_count": 27,
|
| 179 |
+
"metadata": {},
|
| 180 |
+
"outputs": [],
|
| 181 |
+
"source": [
|
| 182 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 183 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 184 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"cell_type": "code",
|
| 189 |
+
"execution_count": null,
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"outputs": [],
|
| 192 |
+
"source": [
|
| 193 |
+
"messages"
|
| 194 |
+
]
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"cell_type": "code",
|
| 198 |
+
"execution_count": null,
|
| 199 |
+
"metadata": {},
|
| 200 |
+
"outputs": [],
|
| 201 |
+
"source": [
|
| 202 |
+
"openai = OpenAI()\n",
|
| 203 |
+
"response = openai.chat.completions.create(\n",
|
| 204 |
+
" model=\"gpt-4o-mini\",\n",
|
| 205 |
+
" messages=messages,\n",
|
| 206 |
+
")\n",
|
| 207 |
+
"question = response.choices[0].message.content\n",
|
| 208 |
+
"print(question)\n"
|
| 209 |
+
]
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"cell_type": "code",
|
| 213 |
+
"execution_count": 30,
|
| 214 |
+
"metadata": {},
|
| 215 |
+
"outputs": [],
|
| 216 |
+
"source": [
|
| 217 |
+
"competitors = []\n",
|
| 218 |
+
"answers = []\n",
|
| 219 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"cell_type": "code",
|
| 224 |
+
"execution_count": 31,
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"outputs": [],
|
| 227 |
+
"source": [
|
| 228 |
+
"# The API we know well\n",
|
| 229 |
+
"\n",
|
| 230 |
+
"async def openai_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 231 |
+
" openai = AsyncOpenAI()\n",
|
| 232 |
+
" response = await openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 233 |
+
" answer = response.choices[0].message.content\n",
|
| 234 |
+
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 235 |
+
" return answer\n"
|
| 236 |
+
]
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"cell_type": "code",
|
| 240 |
+
"execution_count": 32,
|
| 241 |
+
"metadata": {},
|
| 242 |
+
"outputs": [],
|
| 243 |
+
"source": [
|
| 244 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 245 |
+
"\n",
|
| 246 |
+
"async def claude_anthropic_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 247 |
+
" claude = AsyncAnthropic()\n",
|
| 248 |
+
" response = await claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 249 |
+
" answer = response.content[0].text\n",
|
| 250 |
+
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 251 |
+
" return answer\n"
|
| 252 |
+
]
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"cell_type": "code",
|
| 256 |
+
"execution_count": 33,
|
| 257 |
+
"metadata": {},
|
| 258 |
+
"outputs": [],
|
| 259 |
+
"source": [
|
| 260 |
+
"async def gemini_google_answer(messages: list[dict[str, str]], model_name : str) -> str: \n",
|
| 261 |
+
" gemini = AsyncOpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 262 |
+
" response = await gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 263 |
+
" answer = response.choices[0].message.content.strip()\n",
|
| 264 |
+
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 265 |
+
" return answer\n"
|
| 266 |
+
]
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"cell_type": "code",
|
| 270 |
+
"execution_count": 34,
|
| 271 |
+
"metadata": {},
|
| 272 |
+
"outputs": [],
|
| 273 |
+
"source": [
|
| 274 |
+
"async def deepseek_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 275 |
+
" deepseek = AsyncOpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 276 |
+
" response = await deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 277 |
+
" answer = response.choices[0].message.content\n",
|
| 278 |
+
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 279 |
+
" return answer\n"
|
| 280 |
+
]
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"cell_type": "code",
|
| 284 |
+
"execution_count": 35,
|
| 285 |
+
"metadata": {},
|
| 286 |
+
"outputs": [],
|
| 287 |
+
"source": [
|
| 288 |
+
"async def groq_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 289 |
+
" groq = AsyncOpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 290 |
+
" response = await groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 291 |
+
" answer = response.choices[0].message.content\n",
|
| 292 |
+
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 293 |
+
" return answer\n"
|
| 294 |
+
]
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"cell_type": "markdown",
|
| 298 |
+
"metadata": {},
|
| 299 |
+
"source": [
|
| 300 |
+
"## For the next cell, we will use Ollama\n",
|
| 301 |
+
"\n",
|
| 302 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 303 |
+
"and runs models locally using high performance C++ code.\n",
|
| 304 |
+
"\n",
|
| 305 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 306 |
+
"\n",
|
| 307 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 308 |
+
"\n",
|
| 309 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 310 |
+
"\n",
|
| 311 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 312 |
+
"\n",
|
| 313 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 314 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 315 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 316 |
+
]
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"cell_type": "markdown",
|
| 320 |
+
"metadata": {},
|
| 321 |
+
"source": [
|
| 322 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 323 |
+
" <tr>\n",
|
| 324 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 325 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 326 |
+
" </td>\n",
|
| 327 |
+
" <td>\n",
|
| 328 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 329 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 330 |
+
" </span>\n",
|
| 331 |
+
" </td>\n",
|
| 332 |
+
" </tr>\n",
|
| 333 |
+
"</table>"
|
| 334 |
+
]
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"cell_type": "code",
|
| 338 |
+
"execution_count": 36,
|
| 339 |
+
"metadata": {},
|
| 340 |
+
"outputs": [],
|
| 341 |
+
"source": [
|
| 342 |
+
"#!ollama pull llama3.2"
|
| 343 |
+
]
|
| 344 |
+
},
|
| 345 |
+
{
|
| 346 |
+
"cell_type": "code",
|
| 347 |
+
"execution_count": 37,
|
| 348 |
+
"metadata": {},
|
| 349 |
+
"outputs": [],
|
| 350 |
+
"source": [
|
| 351 |
+
"async def ollama_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 352 |
+
" ollama = AsyncOpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 353 |
+
" response = await ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 354 |
+
" answer = response.choices[0].message.content\n",
|
| 355 |
+
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 356 |
+
" return answer\n"
|
| 357 |
+
]
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"cell_type": "code",
|
| 361 |
+
"execution_count": null,
|
| 362 |
+
"metadata": {},
|
| 363 |
+
"outputs": [],
|
| 364 |
+
"source": [
|
| 365 |
+
"answerers = [openai_answer, claude_anthropic_answer, gemini_google_answer, deepseek_answer, groq_answer, ollama_answer]\n",
|
| 366 |
+
"models = [\"gpt-4o-mini\", \"claude-3-7-sonnet-latest\", \"gemini-2.0-flash\", \"deepseek-chat\", \"llama-3.3-70b-versatile\", \"llama3.2\"]\n",
|
| 367 |
+
"\n",
|
| 368 |
+
"tasks = [ answerer(messages, model) for answerer, model in zip(answerers, models)]\n",
|
| 369 |
+
"answers : list[str] = await asyncio.gather(*tasks)\n",
|
| 370 |
+
"results : list[LLMResult] = [LLMResult(model=model, answer=answer) for model, answer in zip(models, answers)]\n"
|
| 371 |
+
]
|
| 372 |
+
},
|
| 373 |
+
{
|
| 374 |
+
"cell_type": "code",
|
| 375 |
+
"execution_count": null,
|
| 376 |
+
"metadata": {},
|
| 377 |
+
"outputs": [],
|
| 378 |
+
"source": [
|
| 379 |
+
"answers "
|
| 380 |
+
]
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"cell_type": "code",
|
| 384 |
+
"execution_count": 40,
|
| 385 |
+
"metadata": {},
|
| 386 |
+
"outputs": [],
|
| 387 |
+
"source": [
|
| 388 |
+
"reviewer = f\"\"\"You are reviewing a submission for a writing competition. The particpant has been given this question to answer:\n",
|
| 389 |
+
"\n",
|
| 390 |
+
"{question}\n",
|
| 391 |
+
"\n",
|
| 392 |
+
"Your job is to evaluate the response for clarity and strength of argument, and provide constructive suggestions for improving the answer.\n",
|
| 393 |
+
"Limit your feedback to 200 words.\n",
|
| 394 |
+
"\n",
|
| 395 |
+
"Here is the particpant's answer:\n",
|
| 396 |
+
"{{answer}}\n",
|
| 397 |
+
"\"\"\"\n",
|
| 398 |
+
"\n",
|
| 399 |
+
"async def review_answer(answer : str) -> str:\n",
|
| 400 |
+
" openai = AsyncOpenAI()\n",
|
| 401 |
+
" reviewer_messages = [{\"role\": \"user\", \"content\": reviewer.format(answer=answer)}]\n",
|
| 402 |
+
" reviewer_response = await openai.chat.completions.create(\n",
|
| 403 |
+
" model=\"gpt-4o-mini\",\n",
|
| 404 |
+
" messages=reviewer_messages,\n",
|
| 405 |
+
" )\n",
|
| 406 |
+
" feedback = reviewer_response.choices[0].message.content\n",
|
| 407 |
+
" print(f\"feedback: {feedback[:50]}...\")\n",
|
| 408 |
+
" return feedback"
|
| 409 |
+
]
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"cell_type": "code",
|
| 413 |
+
"execution_count": null,
|
| 414 |
+
"metadata": {},
|
| 415 |
+
"outputs": [],
|
| 416 |
+
"source": [
|
| 417 |
+
"import asyncio\n",
|
| 418 |
+
"\n",
|
| 419 |
+
"tasks = [review_answer(answer) for answer in answers]\n",
|
| 420 |
+
"feedback = await asyncio.gather(*tasks)\n",
|
| 421 |
+
"\n",
|
| 422 |
+
"for result, feedback in zip(results, feedback):\n",
|
| 423 |
+
" result.feedback = feedback\n"
|
| 424 |
+
]
|
| 425 |
+
},
|
| 426 |
+
{
|
| 427 |
+
"cell_type": "code",
|
| 428 |
+
"execution_count": 42,
|
| 429 |
+
"metadata": {},
|
| 430 |
+
"outputs": [],
|
| 431 |
+
"source": [
|
| 432 |
+
"revision_prompt = f\"\"\"You are revising a submission you wrote for a writing competition based on feedback from a reviewer.\n",
|
| 433 |
+
"\n",
|
| 434 |
+
"You are not required to take any of the feedback into account but you want to win the competition.\n",
|
| 435 |
+
"\n",
|
| 436 |
+
"The question was: \n",
|
| 437 |
+
"{question}\n",
|
| 438 |
+
"\n",
|
| 439 |
+
"The feedback was:\n",
|
| 440 |
+
"{{feedback}}\n",
|
| 441 |
+
"\n",
|
| 442 |
+
"And your original answer was:\n",
|
| 443 |
+
"{{answer}}\n",
|
| 444 |
+
"\n",
|
| 445 |
+
"Please return your revised answer and nothing else.\n",
|
| 446 |
+
"\"\"\"\n"
|
| 447 |
+
]
|
| 448 |
+
},
|
| 449 |
+
{
|
| 450 |
+
"cell_type": "code",
|
| 451 |
+
"execution_count": null,
|
| 452 |
+
"metadata": {},
|
| 453 |
+
"outputs": [],
|
| 454 |
+
"source": [
|
| 455 |
+
"messages = [{\"role\": \"user\", \"content\": revision_prompt.format(answer=answer, feedback=feedback)} for answer, feedback in zip(answers, feedback)]\n",
|
| 456 |
+
"tasks = [ answerer(messages, model) for answerer, model in zip(answerers, models)]\n",
|
| 457 |
+
"revised_answers = await asyncio.gather(*tasks)\n",
|
| 458 |
+
"\n",
|
| 459 |
+
"for revised_answer, result in zip(revised_answers, results):\n",
|
| 460 |
+
" result.revised_answer = revised_answer\n",
|
| 461 |
+
"\n"
|
| 462 |
+
]
|
| 463 |
+
},
|
| 464 |
+
{
|
| 465 |
+
"cell_type": "code",
|
| 466 |
+
"execution_count": 44,
|
| 467 |
+
"metadata": {},
|
| 468 |
+
"outputs": [],
|
| 469 |
+
"source": [
|
| 470 |
+
"# need to use Template because we are making a later substitution for \"together\"\n",
|
| 471 |
+
"judge = Template(f\"\"\"You are judging a competition between {len(results)} competitors.\n",
|
| 472 |
+
"Each model has been given this question:\n",
|
| 473 |
+
"\n",
|
| 474 |
+
"{question}\n",
|
| 475 |
+
"\n",
|
| 476 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 477 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 478 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 479 |
+
"\n",
|
| 480 |
+
"Here are the responses from each competitor:\n",
|
| 481 |
+
"\n",
|
| 482 |
+
"$together\n",
|
| 483 |
+
"\n",
|
| 484 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\")\n",
|
| 485 |
+
"\n",
|
| 486 |
+
"\n"
|
| 487 |
+
]
|
| 488 |
+
},
|
| 489 |
+
{
|
| 490 |
+
"cell_type": "code",
|
| 491 |
+
"execution_count": 45,
|
| 492 |
+
"metadata": {},
|
| 493 |
+
"outputs": [],
|
| 494 |
+
"source": [
|
| 495 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 496 |
+
]
|
| 497 |
+
},
|
| 498 |
+
{
|
| 499 |
+
"cell_type": "code",
|
| 500 |
+
"execution_count": 46,
|
| 501 |
+
"metadata": {},
|
| 502 |
+
"outputs": [],
|
| 503 |
+
"source": [
|
| 504 |
+
"def come_together(results : list[LLMResult], revised_entry : int | None ) -> list[dict[str, str]]:\n",
|
| 505 |
+
" # include revised results for \"revised_entry\" or all entries if revise_entrys is None\n",
|
| 506 |
+
" together = \"\"\n",
|
| 507 |
+
" for index, result in enumerate(results):\n",
|
| 508 |
+
" together += f\"# Response from competitor {index}\\n\\n\"\n",
|
| 509 |
+
" together += result.answer if (index != revised_entry and revised_entry is not None) else result.revised_answer + \"\\n\\n\"\n",
|
| 510 |
+
" return [{\"role\": \"user\", \"content\": judge.substitute(together=together)}]\n",
|
| 511 |
+
"\n",
|
| 512 |
+
"\n",
|
| 513 |
+
"# Judgement time!\n",
|
| 514 |
+
"async def judgement_time(results : list[LLMResult], revised_entry : int ) -> str:\n",
|
| 515 |
+
" judge_messages = come_together(results, revised_entry)\n",
|
| 516 |
+
"\n",
|
| 517 |
+
" openai = AsyncOpenAI()\n",
|
| 518 |
+
" response = await openai.chat.completions.create(\n",
|
| 519 |
+
" model=\"o3-mini\",\n",
|
| 520 |
+
" messages=judge_messages,\n",
|
| 521 |
+
" )\n",
|
| 522 |
+
" results = response.choices[0].message.content\n",
|
| 523 |
+
" results_dict = json.loads(results)\n",
|
| 524 |
+
" results = { int(model) : int(rank) +1 for rank, model in enumerate(results_dict[\"results\"]) }\n",
|
| 525 |
+
" return results\n",
|
| 526 |
+
"\n"
|
| 527 |
+
]
|
| 528 |
+
},
|
| 529 |
+
{
|
| 530 |
+
"cell_type": "code",
|
| 531 |
+
"execution_count": 47,
|
| 532 |
+
"metadata": {},
|
| 533 |
+
"outputs": [],
|
| 534 |
+
"source": [
|
| 535 |
+
"#evaluate the impact of feedback on model performance\n",
|
| 536 |
+
"\n",
|
| 537 |
+
"no_feedback = await judgement_time(results, -1)\n",
|
| 538 |
+
"with_feedback = await judgement_time(results, None)\n",
|
| 539 |
+
"\n",
|
| 540 |
+
"tasks = [ judgement_time(results, i) for i in range(len(results))]\n",
|
| 541 |
+
"model_spefic_feedback = await asyncio.gather(*tasks)\n",
|
| 542 |
+
"\n",
|
| 543 |
+
"for index, result in enumerate(results):\n",
|
| 544 |
+
" result.original_rank = no_feedback[index]\n",
|
| 545 |
+
" result.exclusive_feedback = model_spefic_feedback[index][index]\n",
|
| 546 |
+
" result.revised_rank = with_feedback[index]\n",
|
| 547 |
+
"\n"
|
| 548 |
+
]
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"cell_type": "code",
|
| 552 |
+
"execution_count": null,
|
| 553 |
+
"metadata": {},
|
| 554 |
+
"outputs": [],
|
| 555 |
+
"source": [
|
| 556 |
+
"\n",
|
| 557 |
+
"table = PrettyTable()\n",
|
| 558 |
+
"table.field_names = [\"Model\", \"Original Rank\", \"Exclusive Feedback\", \"With Feedback (all models)\"]\n",
|
| 559 |
+
"\n",
|
| 560 |
+
"for result in results:\n",
|
| 561 |
+
" table.add_row([result.model, result.original_rank, result.exclusive_feedback, result.revised_rank])\n",
|
| 562 |
+
"\n",
|
| 563 |
+
"print(table)\n",
|
| 564 |
+
"\n"
|
| 565 |
+
]
|
| 566 |
+
},
|
| 567 |
+
{
|
| 568 |
+
"cell_type": "markdown",
|
| 569 |
+
"metadata": {},
|
| 570 |
+
"source": [
|
| 571 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 572 |
+
" <tr>\n",
|
| 573 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 574 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 575 |
+
" </td>\n",
|
| 576 |
+
" <td>\n",
|
| 577 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 578 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 579 |
+
" </span>\n",
|
| 580 |
+
" </td>\n",
|
| 581 |
+
" </tr>\n",
|
| 582 |
+
"</table>"
|
| 583 |
+
]
|
| 584 |
+
},
|
| 585 |
+
{
|
| 586 |
+
"cell_type": "markdown",
|
| 587 |
+
"metadata": {},
|
| 588 |
+
"source": [
|
| 589 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 590 |
+
" <tr>\n",
|
| 591 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 592 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 593 |
+
" </td>\n",
|
| 594 |
+
" <td>\n",
|
| 595 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 596 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 597 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 598 |
+
" to business projects where accuracy is critical.\n",
|
| 599 |
+
" </span>\n",
|
| 600 |
+
" </td>\n",
|
| 601 |
+
" </tr>\n",
|
| 602 |
+
"</table>"
|
| 603 |
+
]
|
| 604 |
+
}
|
| 605 |
+
],
|
| 606 |
+
"metadata": {
|
| 607 |
+
"kernelspec": {
|
| 608 |
+
"display_name": ".venv",
|
| 609 |
+
"language": "python",
|
| 610 |
+
"name": "python3"
|
| 611 |
+
},
|
| 612 |
+
"language_info": {
|
| 613 |
+
"codemirror_mode": {
|
| 614 |
+
"name": "ipython",
|
| 615 |
+
"version": 3
|
| 616 |
+
},
|
| 617 |
+
"file_extension": ".py",
|
| 618 |
+
"mimetype": "text/x-python",
|
| 619 |
+
"name": "python",
|
| 620 |
+
"nbconvert_exporter": "python",
|
| 621 |
+
"pygments_lexer": "ipython3",
|
| 622 |
+
"version": "3.12.9"
|
| 623 |
+
}
|
| 624 |
+
},
|
| 625 |
+
"nbformat": 4,
|
| 626 |
+
"nbformat_minor": 2
|
| 627 |
+
}
|
community_contributions/2_lab2_moneek.ipynb
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"This program uses Evaluator Optimizer pattern to enhance generator's response in creating marketing content for smart keyboard."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "code",
|
| 14 |
+
"execution_count": null,
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"import os\n",
|
| 21 |
+
"import json\n",
|
| 22 |
+
"from dotenv import load_dotenv\n",
|
| 23 |
+
"from openai import OpenAI\n",
|
| 24 |
+
"from anthropic import Anthropic\n",
|
| 25 |
+
"from IPython.display import Markdown, display"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": null,
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"outputs": [],
|
| 33 |
+
"source": [
|
| 34 |
+
"# Always remember to do this!\n",
|
| 35 |
+
"load_dotenv(override=True)"
|
| 36 |
+
]
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"cell_type": "code",
|
| 40 |
+
"execution_count": null,
|
| 41 |
+
"metadata": {},
|
| 42 |
+
"outputs": [],
|
| 43 |
+
"source": [
|
| 44 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 45 |
+
"\n",
|
| 46 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 47 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 48 |
+
"\n",
|
| 49 |
+
"if openai_api_key:\n",
|
| 50 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 51 |
+
"else:\n",
|
| 52 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 53 |
+
" \n",
|
| 54 |
+
"if anthropic_api_key:\n",
|
| 55 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 56 |
+
"else:\n",
|
| 57 |
+
" print(\"Anthropic API Key not set (and this is optional)\")"
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"cell_type": "code",
|
| 62 |
+
"execution_count": null,
|
| 63 |
+
"metadata": {},
|
| 64 |
+
"outputs": [],
|
| 65 |
+
"source": [
|
| 66 |
+
"request = \"Provide a short marketing content for XYZ keyboard. \"\n",
|
| 67 |
+
"request += \"It should be eagaging and talks about innovative features.\"\n",
|
| 68 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"cell_type": "code",
|
| 73 |
+
"execution_count": null,
|
| 74 |
+
"metadata": {},
|
| 75 |
+
"outputs": [],
|
| 76 |
+
"source": [
|
| 77 |
+
"messages"
|
| 78 |
+
]
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"cell_type": "code",
|
| 82 |
+
"execution_count": null,
|
| 83 |
+
"metadata": {},
|
| 84 |
+
"outputs": [],
|
| 85 |
+
"source": [
|
| 86 |
+
"openai = OpenAI()\n",
|
| 87 |
+
"\n",
|
| 88 |
+
"response = openai.chat.completions.create(\n",
|
| 89 |
+
" model=\"gpt-4o-mini\",\n",
|
| 90 |
+
" messages=messages,\n",
|
| 91 |
+
")\n",
|
| 92 |
+
"marketing_statement= response.choices[0].message.content\n",
|
| 93 |
+
"print(marketing_statement)\n",
|
| 94 |
+
"\n"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": null,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"judge = f\"\"\"### Instruction ###\n",
|
| 104 |
+
"You are an expert tech gadget analyst. Your task is to evaluate a marketing material based on several criteria.\n",
|
| 105 |
+
"Please be brief.\n",
|
| 106 |
+
"\n",
|
| 107 |
+
"### Ad to Evaluate ###\n",
|
| 108 |
+
"{marketing_statement}\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"### Evaluation Criteria ###\n",
|
| 111 |
+
"Evaluate the statement based on how engaging it is.\n",
|
| 112 |
+
"\n",
|
| 113 |
+
"### Expected Output Format ###\n",
|
| 114 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 115 |
+
"{{\"results\": {{\"statement\": \"{marketing_statement}\", \"engagability\": \"Comment on whether the content is engaging\", \"critique\": \"Offer a specific critique and suggest at least one way the recipe could be improved\", \"verdict\": \"This should have a value either 'accepted' or 'rejected' based on whether the statement requires improvement\"}}}}\n",
|
| 116 |
+
"\"\"\"\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"print(judge)\n",
|
| 119 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]\n",
|
| 120 |
+
"\n",
|
| 121 |
+
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 122 |
+
"claude = Anthropic()\n",
|
| 123 |
+
"response = claude.messages.create(model=model_name, messages=judge_messages, max_tokens=1000)\n",
|
| 124 |
+
"marketing_statement_feedback = response.content[0].text\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"print(marketing_statement_feedback)\n"
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"cell_type": "code",
|
| 131 |
+
"execution_count": null,
|
| 132 |
+
"metadata": {},
|
| 133 |
+
"outputs": [],
|
| 134 |
+
"source": [
|
| 135 |
+
"results_dict = json.loads(marketing_statement_feedback)\n",
|
| 136 |
+
"feedback = results_dict[\"results\"]\n",
|
| 137 |
+
"print(feedback)\n",
|
| 138 |
+
"print(\"\\n\\n\")\n",
|
| 139 |
+
"display(Markdown(marketing_statement_feedback))\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"print(f\"Marketing statement:\\n{feedback[\"statement\"]}\")\n",
|
| 142 |
+
"for key in feedback:\n",
|
| 143 |
+
" if key == \"verdict\":\n",
|
| 144 |
+
" if feedback[key] == \"accepted\":\n",
|
| 145 |
+
" print(\"Marketing statement was accepted.\")\n",
|
| 146 |
+
" break\n",
|
| 147 |
+
" else:\n",
|
| 148 |
+
" print(\"Marketing statement was rejected and requires revision. Please iterate over to call Generator and Evaluator for improvement\")"
|
| 149 |
+
]
|
| 150 |
+
}
|
| 151 |
+
],
|
| 152 |
+
"metadata": {
|
| 153 |
+
"kernelspec": {
|
| 154 |
+
"display_name": ".venv",
|
| 155 |
+
"language": "python",
|
| 156 |
+
"name": "python3"
|
| 157 |
+
},
|
| 158 |
+
"language_info": {
|
| 159 |
+
"codemirror_mode": {
|
| 160 |
+
"name": "ipython",
|
| 161 |
+
"version": 3
|
| 162 |
+
},
|
| 163 |
+
"file_extension": ".py",
|
| 164 |
+
"mimetype": "text/x-python",
|
| 165 |
+
"name": "python",
|
| 166 |
+
"nbconvert_exporter": "python",
|
| 167 |
+
"pygments_lexer": "ipython3",
|
| 168 |
+
"version": "3.12.11"
|
| 169 |
+
}
|
| 170 |
+
},
|
| 171 |
+
"nbformat": 4,
|
| 172 |
+
"nbformat_minor": 2
|
| 173 |
+
}
|
community_contributions/2_lab2_multi-evaluation-criteria.ipynb
ADDED
|
@@ -0,0 +1,506 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": null,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"import os\n",
|
| 39 |
+
"import json\n",
|
| 40 |
+
"from dotenv import load_dotenv\n",
|
| 41 |
+
"from openai import OpenAI\n",
|
| 42 |
+
"from anthropic import Anthropic\n",
|
| 43 |
+
"from IPython.display import Markdown, display"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"# Always remember to do this!\n",
|
| 53 |
+
"load_dotenv(override=True)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"if openai_api_key:\n",
|
| 71 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
+
" \n",
|
| 75 |
+
"if anthropic_api_key:\n",
|
| 76 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"if google_api_key:\n",
|
| 81 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
+
"else:\n",
|
| 83 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"if deepseek_api_key:\n",
|
| 86 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
+
"else:\n",
|
| 88 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"if groq_api_key:\n",
|
| 91 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
+
"else:\n",
|
| 93 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": null,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"source": [
|
| 102 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "code",
|
| 109 |
+
"execution_count": null,
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"source": [
|
| 113 |
+
"messages"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": null,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [],
|
| 121 |
+
"source": [
|
| 122 |
+
"openai = OpenAI()\n",
|
| 123 |
+
"response = openai.chat.completions.create(\n",
|
| 124 |
+
" model=\"gpt-4o-mini\",\n",
|
| 125 |
+
" messages=messages,\n",
|
| 126 |
+
")\n",
|
| 127 |
+
"question = response.choices[0].message.content\n",
|
| 128 |
+
"print(question)\n"
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"cell_type": "code",
|
| 133 |
+
"execution_count": null,
|
| 134 |
+
"metadata": {},
|
| 135 |
+
"outputs": [],
|
| 136 |
+
"source": [
|
| 137 |
+
"competitors = []\n",
|
| 138 |
+
"answers = []\n",
|
| 139 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": null,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [],
|
| 147 |
+
"source": [
|
| 148 |
+
"# The API we know well\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"model_name = \"gpt-4o-mini\"\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
+
"answer = response.choices[0].message.content\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"display(Markdown(answer))\n",
|
| 156 |
+
"competitors.append(model_name)\n",
|
| 157 |
+
"answers.append(answer)"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": null,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"model_name = \"claude-sonnet-4-latest\"\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"claude = Anthropic()\n",
|
| 171 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 172 |
+
"answer = response.content[0].text\n",
|
| 173 |
+
"\n",
|
| 174 |
+
"display(Markdown(answer))\n",
|
| 175 |
+
"competitors.append(model_name)\n",
|
| 176 |
+
"answers.append(answer)"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"cell_type": "code",
|
| 181 |
+
"execution_count": null,
|
| 182 |
+
"metadata": {},
|
| 183 |
+
"outputs": [],
|
| 184 |
+
"source": [
|
| 185 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 186 |
+
"model_name = \"gemini-2.0-flash\"\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
+
"answer = response.choices[0].message.content\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"display(Markdown(answer))\n",
|
| 192 |
+
"competitors.append(model_name)\n",
|
| 193 |
+
"answers.append(answer)"
|
| 194 |
+
]
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"cell_type": "code",
|
| 198 |
+
"execution_count": null,
|
| 199 |
+
"metadata": {},
|
| 200 |
+
"outputs": [],
|
| 201 |
+
"source": [
|
| 202 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 203 |
+
"model_name = \"deepseek-chat\"\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
+
"answer = response.choices[0].message.content\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"display(Markdown(answer))\n",
|
| 209 |
+
"competitors.append(model_name)\n",
|
| 210 |
+
"answers.append(answer)"
|
| 211 |
+
]
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"cell_type": "code",
|
| 215 |
+
"execution_count": null,
|
| 216 |
+
"metadata": {},
|
| 217 |
+
"outputs": [],
|
| 218 |
+
"source": [
|
| 219 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 220 |
+
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 223 |
+
"answer = response.choices[0].message.content\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"display(Markdown(answer))\n",
|
| 226 |
+
"competitors.append(model_name)\n",
|
| 227 |
+
"answers.append(answer)\n"
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"cell_type": "markdown",
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"source": [
|
| 234 |
+
"## For the next cell, we will use Ollama\n",
|
| 235 |
+
"\n",
|
| 236 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 237 |
+
"and runs models locally using high performance C++ code.\n",
|
| 238 |
+
"\n",
|
| 239 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 240 |
+
"\n",
|
| 241 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 246 |
+
"\n",
|
| 247 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 248 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 249 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 250 |
+
]
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"cell_type": "markdown",
|
| 254 |
+
"metadata": {},
|
| 255 |
+
"source": [
|
| 256 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 257 |
+
" <tr>\n",
|
| 258 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 259 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 260 |
+
" </td>\n",
|
| 261 |
+
" <td>\n",
|
| 262 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 263 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 264 |
+
" </span>\n",
|
| 265 |
+
" </td>\n",
|
| 266 |
+
" </tr>\n",
|
| 267 |
+
"</table>"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": null,
|
| 273 |
+
"metadata": {},
|
| 274 |
+
"outputs": [],
|
| 275 |
+
"source": [
|
| 276 |
+
"!ollama pull llama3.2"
|
| 277 |
+
]
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
"cell_type": "code",
|
| 281 |
+
"execution_count": null,
|
| 282 |
+
"metadata": {},
|
| 283 |
+
"outputs": [],
|
| 284 |
+
"source": [
|
| 285 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 286 |
+
"model_name = \"llama3\"\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 289 |
+
"answer = response.choices[0].message.content\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"display(Markdown(answer))\n",
|
| 292 |
+
"competitors.append(model_name)\n",
|
| 293 |
+
"answers.append(answer)"
|
| 294 |
+
]
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"cell_type": "code",
|
| 298 |
+
"execution_count": null,
|
| 299 |
+
"metadata": {},
|
| 300 |
+
"outputs": [],
|
| 301 |
+
"source": [
|
| 302 |
+
"# So where are we?\n",
|
| 303 |
+
"\n",
|
| 304 |
+
"print(competitors)\n",
|
| 305 |
+
"print(answers)\n"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"cell_type": "code",
|
| 310 |
+
"execution_count": null,
|
| 311 |
+
"metadata": {},
|
| 312 |
+
"outputs": [],
|
| 313 |
+
"source": [
|
| 314 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 315 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 316 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "code",
|
| 321 |
+
"execution_count": null,
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"outputs": [],
|
| 324 |
+
"source": [
|
| 325 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 326 |
+
" display(Markdown(f\"# Competitor: {competitor}\\n\\n{answer}\"))"
|
| 327 |
+
]
|
| 328 |
+
},
|
| 329 |
+
{
|
| 330 |
+
"cell_type": "code",
|
| 331 |
+
"execution_count": null,
|
| 332 |
+
"metadata": {},
|
| 333 |
+
"outputs": [],
|
| 334 |
+
"source": [
|
| 335 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 336 |
+
"\n",
|
| 337 |
+
"together = \"\"\n",
|
| 338 |
+
"for index, answer in enumerate(answers):\n",
|
| 339 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 340 |
+
" together += answer + \"\\n\\n\""
|
| 341 |
+
]
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"cell_type": "code",
|
| 345 |
+
"execution_count": null,
|
| 346 |
+
"metadata": {},
|
| 347 |
+
"outputs": [],
|
| 348 |
+
"source": [
|
| 349 |
+
"print(together)"
|
| 350 |
+
]
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"cell_type": "code",
|
| 354 |
+
"execution_count": null,
|
| 355 |
+
"metadata": {},
|
| 356 |
+
"outputs": [],
|
| 357 |
+
"source": [
|
| 358 |
+
"evaluation_criteria = [\"Effectiveness in resolving the conflict\", \"Clarity of argument\", \"Creativity of solution\", \"Strength of argument\", \"conciseness\", \"applicability to a business context\"]\n",
|
| 359 |
+
"\n",
|
| 360 |
+
"judgements = []\n",
|
| 361 |
+
"\n",
|
| 362 |
+
"for evaluation_criterion in evaluation_criteria:\n",
|
| 363 |
+
"\n",
|
| 364 |
+
" judgements.append (f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 365 |
+
" Each model has been given this question:\n",
|
| 366 |
+
"\n",
|
| 367 |
+
" {question}\n",
|
| 368 |
+
"\n",
|
| 369 |
+
" Your job is to evaluate each response for {evaluation_criterion}, and rank them in order of best to worst.\n",
|
| 370 |
+
" Respond with JSON, and only JSON, with the following format:\n",
|
| 371 |
+
" {{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 372 |
+
"\n",
|
| 373 |
+
" Here are the responses from each competitor:\n",
|
| 374 |
+
"\n",
|
| 375 |
+
" {together}\n",
|
| 376 |
+
"\n",
|
| 377 |
+
" Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\")\n"
|
| 378 |
+
]
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"cell_type": "code",
|
| 382 |
+
"execution_count": null,
|
| 383 |
+
"metadata": {},
|
| 384 |
+
"outputs": [],
|
| 385 |
+
"source": [
|
| 386 |
+
"print(judgements[1])\n"
|
| 387 |
+
]
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"cell_type": "code",
|
| 391 |
+
"execution_count": null,
|
| 392 |
+
"metadata": {},
|
| 393 |
+
"outputs": [],
|
| 394 |
+
"source": [
|
| 395 |
+
"\n",
|
| 396 |
+
"judge_messages = []\n",
|
| 397 |
+
"for judgement in judgements:\n",
|
| 398 |
+
" judge_messages.append ([{\"role\": \"user\", \"content\": judgement}])"
|
| 399 |
+
]
|
| 400 |
+
},
|
| 401 |
+
{
|
| 402 |
+
"cell_type": "code",
|
| 403 |
+
"execution_count": null,
|
| 404 |
+
"metadata": {},
|
| 405 |
+
"outputs": [],
|
| 406 |
+
"source": [
|
| 407 |
+
"results = []\n",
|
| 408 |
+
"# Judgement time!\n",
|
| 409 |
+
"for judge_message in judge_messages:\n",
|
| 410 |
+
" openai = OpenAI()\n",
|
| 411 |
+
" response = openai.chat.completions.create(\n",
|
| 412 |
+
" model=\"o3-mini\",\n",
|
| 413 |
+
" messages=judge_message,\n",
|
| 414 |
+
" )\n",
|
| 415 |
+
" results.append (response.choices[0].message.content)\n",
|
| 416 |
+
" print(results[0])\n"
|
| 417 |
+
]
|
| 418 |
+
},
|
| 419 |
+
{
|
| 420 |
+
"cell_type": "code",
|
| 421 |
+
"execution_count": null,
|
| 422 |
+
"metadata": {},
|
| 423 |
+
"outputs": [],
|
| 424 |
+
"source": [
|
| 425 |
+
"for result in results:\n",
|
| 426 |
+
" print(result)"
|
| 427 |
+
]
|
| 428 |
+
},
|
| 429 |
+
{
|
| 430 |
+
"cell_type": "code",
|
| 431 |
+
"execution_count": null,
|
| 432 |
+
"metadata": {},
|
| 433 |
+
"outputs": [],
|
| 434 |
+
"source": [
|
| 435 |
+
"# OK let's turn this into results!\n",
|
| 436 |
+
"\n",
|
| 437 |
+
"for result, evaluation_criterion in zip(results, evaluation_criteria):\n",
|
| 438 |
+
" results_dict = json.loads(result)\n",
|
| 439 |
+
" ranks = results_dict[\"results\"]\n",
|
| 440 |
+
" display(Markdown(f\"### {evaluation_criterion}\"))\n",
|
| 441 |
+
" for index, result in enumerate(ranks):\n",
|
| 442 |
+
" competitor = competitors[int(result)-1] \n",
|
| 443 |
+
" display(Markdown(f\"Rank {index+1}: {competitor}\"))"
|
| 444 |
+
]
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"cell_type": "markdown",
|
| 448 |
+
"metadata": {},
|
| 449 |
+
"source": [
|
| 450 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 451 |
+
" <tr>\n",
|
| 452 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 453 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 454 |
+
" </td>\n",
|
| 455 |
+
" <td>\n",
|
| 456 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 457 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 458 |
+
" </span>\n",
|
| 459 |
+
" </td>\n",
|
| 460 |
+
" </tr>\n",
|
| 461 |
+
"</table>"
|
| 462 |
+
]
|
| 463 |
+
},
|
| 464 |
+
{
|
| 465 |
+
"cell_type": "markdown",
|
| 466 |
+
"metadata": {},
|
| 467 |
+
"source": [
|
| 468 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 469 |
+
" <tr>\n",
|
| 470 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 471 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 472 |
+
" </td>\n",
|
| 473 |
+
" <td>\n",
|
| 474 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 475 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 476 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 477 |
+
" to business projects where accuracy is critical.\n",
|
| 478 |
+
" </span>\n",
|
| 479 |
+
" </td>\n",
|
| 480 |
+
" </tr>\n",
|
| 481 |
+
"</table>"
|
| 482 |
+
]
|
| 483 |
+
}
|
| 484 |
+
],
|
| 485 |
+
"metadata": {
|
| 486 |
+
"kernelspec": {
|
| 487 |
+
"display_name": ".venv",
|
| 488 |
+
"language": "python",
|
| 489 |
+
"name": "python3"
|
| 490 |
+
},
|
| 491 |
+
"language_info": {
|
| 492 |
+
"codemirror_mode": {
|
| 493 |
+
"name": "ipython",
|
| 494 |
+
"version": 3
|
| 495 |
+
},
|
| 496 |
+
"file_extension": ".py",
|
| 497 |
+
"mimetype": "text/x-python",
|
| 498 |
+
"name": "python",
|
| 499 |
+
"nbconvert_exporter": "python",
|
| 500 |
+
"pygments_lexer": "ipython3",
|
| 501 |
+
"version": "3.12.10"
|
| 502 |
+
}
|
| 503 |
+
},
|
| 504 |
+
"nbformat": 4,
|
| 505 |
+
"nbformat_minor": 2
|
| 506 |
+
}
|
community_contributions/2_lab2_orchestrator.ipynb
ADDED
|
@@ -0,0 +1,494 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "ed27526e",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 9 |
+
" <tr>\n",
|
| 10 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 11 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 12 |
+
" </td>\n",
|
| 13 |
+
" <td>\n",
|
| 14 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 15 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 16 |
+
" </span>\n",
|
| 17 |
+
" </td>\n",
|
| 18 |
+
" </tr>\n",
|
| 19 |
+
"</table>"
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"cell_type": "code",
|
| 24 |
+
"execution_count": null,
|
| 25 |
+
"id": "1d3a7c44",
|
| 26 |
+
"metadata": {},
|
| 27 |
+
"outputs": [],
|
| 28 |
+
"source": [
|
| 29 |
+
"# Start with imports\n",
|
| 30 |
+
"\n",
|
| 31 |
+
"import os\n",
|
| 32 |
+
"import json\n",
|
| 33 |
+
"from dotenv import load_dotenv\n",
|
| 34 |
+
"from openai import OpenAI\n",
|
| 35 |
+
"from anthropic import Anthropic\n",
|
| 36 |
+
"from IPython.display import Markdown, display"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "code",
|
| 41 |
+
"execution_count": null,
|
| 42 |
+
"id": "ca5dc982",
|
| 43 |
+
"metadata": {},
|
| 44 |
+
"outputs": [],
|
| 45 |
+
"source": [
|
| 46 |
+
"# Always remember to do this!\n",
|
| 47 |
+
"load_dotenv(override=True)"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "code",
|
| 52 |
+
"execution_count": null,
|
| 53 |
+
"id": "a53039f5",
|
| 54 |
+
"metadata": {},
|
| 55 |
+
"outputs": [],
|
| 56 |
+
"source": [
|
| 57 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 60 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 61 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 62 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 63 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 64 |
+
"\n",
|
| 65 |
+
"if openai_api_key:\n",
|
| 66 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 67 |
+
"else:\n",
|
| 68 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 69 |
+
" \n",
|
| 70 |
+
"if anthropic_api_key:\n",
|
| 71 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 72 |
+
"else:\n",
|
| 73 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 74 |
+
"\n",
|
| 75 |
+
"if google_api_key:\n",
|
| 76 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 77 |
+
"else:\n",
|
| 78 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"if deepseek_api_key:\n",
|
| 81 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 82 |
+
"else:\n",
|
| 83 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"if groq_api_key:\n",
|
| 86 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 87 |
+
"else:\n",
|
| 88 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 89 |
+
]
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"cell_type": "code",
|
| 93 |
+
"execution_count": null,
|
| 94 |
+
"id": "a2f091d4",
|
| 95 |
+
"metadata": {},
|
| 96 |
+
"outputs": [],
|
| 97 |
+
"source": [
|
| 98 |
+
"# Generate a challenging question\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 101 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 102 |
+
"messages = [{\"role\": \"user\", \"content\": request}]\n",
|
| 103 |
+
"\n",
|
| 104 |
+
"openai = OpenAI()\n",
|
| 105 |
+
"response = openai.chat.completions.create(\n",
|
| 106 |
+
" model=\"gpt-5-mini\",\n",
|
| 107 |
+
" messages=messages,\n",
|
| 108 |
+
")\n",
|
| 109 |
+
"question = response.choices[0].message.content\n",
|
| 110 |
+
"print(f\"Generated Question: {question}\")"
|
| 111 |
+
]
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"cell_type": "markdown",
|
| 115 |
+
"id": "6db23f57",
|
| 116 |
+
"metadata": {},
|
| 117 |
+
"source": [
|
| 118 |
+
"## Intelligent Orchestrator Pattern\n",
|
| 119 |
+
"\n",
|
| 120 |
+
"This pattern combines:\n",
|
| 121 |
+
"1. **Orchestrator-Workers** - Breaking down complex tasks\n",
|
| 122 |
+
"2. **Intelligent Routing** - Matching models to their strengths\n",
|
| 123 |
+
"3. **Synthesis** - Combining specialized responses"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"cell_type": "code",
|
| 128 |
+
"execution_count": null,
|
| 129 |
+
"id": "7659a40a",
|
| 130 |
+
"metadata": {},
|
| 131 |
+
"outputs": [],
|
| 132 |
+
"source": [
|
| 133 |
+
"# STEP 1: Orchestrator breaks down the question and assigns models based on their strengths\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"orchestrator_prompt = f\"\"\"You are an intelligent orchestrator AI. Analyze this complex question and:\n",
|
| 136 |
+
"\n",
|
| 137 |
+
"1. Break it down into 3-4 simpler sub-questions\n",
|
| 138 |
+
"2. For each sub-question, recommend which type of AI model would be best suited\n",
|
| 139 |
+
"\n",
|
| 140 |
+
"Available models and their strengths:\n",
|
| 141 |
+
"- gpt-5-nano: Excellent at reasoning, complex logic, and nuanced analysis\n",
|
| 142 |
+
"- claude-sonnet-4-5: Strong at creative writing, empathy, and ethical reasoning\n",
|
| 143 |
+
"- gemini-2.5-flash: Fast at factual retrieval, technical explanations, and structured data\n",
|
| 144 |
+
"- deepseek-chat: Great at code generation, mathematical problems, and technical documentation\n",
|
| 145 |
+
"- openai/gpt-oss-120b: Good general purpose, cost-effective for straightforward tasks\n",
|
| 146 |
+
"- llama3.2: Privacy-focused local model, good for sensitive data and general tasks\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"Original question: {question}\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"Respond with JSON only, in this format:\n",
|
| 151 |
+
"{{\n",
|
| 152 |
+
" \"sub_questions\": [\n",
|
| 153 |
+
" {{\n",
|
| 154 |
+
" \"question\": \"the sub-question text\",\n",
|
| 155 |
+
" \"reasoning\": \"why this model is best for this sub-question\",\n",
|
| 156 |
+
" \"recommended_model\": \"model_name\"\n",
|
| 157 |
+
" }},\n",
|
| 158 |
+
" ...\n",
|
| 159 |
+
" ]\n",
|
| 160 |
+
"}}\"\"\"\n",
|
| 161 |
+
"\n",
|
| 162 |
+
"orchestrator_messages = [{\"role\": \"user\", \"content\": orchestrator_prompt}]\n",
|
| 163 |
+
"\n",
|
| 164 |
+
"response = openai.chat.completions.create(\n",
|
| 165 |
+
" model=\"gpt-5-mini\",\n",
|
| 166 |
+
" messages=orchestrator_messages,\n",
|
| 167 |
+
")\n",
|
| 168 |
+
"orchestration_plan = json.loads(response.choices[0].message.content)\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"print(\"🎯 Orchestrator's Intelligent Routing Plan:\\n\")\n",
|
| 171 |
+
"for i, item in enumerate(orchestration_plan[\"sub_questions\"], 1):\n",
|
| 172 |
+
" print(f\"{i}. SUB-QUESTION: {item['question']}\")\n",
|
| 173 |
+
" print(f\" 📍 ASSIGNED TO: {item['recommended_model']}\")\n",
|
| 174 |
+
" print(f\" 💡 REASONING: {item['reasoning']}\\n\")"
|
| 175 |
+
]
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"cell_type": "markdown",
|
| 179 |
+
"id": "d62e4fa8",
|
| 180 |
+
"metadata": {},
|
| 181 |
+
"source": [
|
| 182 |
+
"## For Ollama setup\n",
|
| 183 |
+
"\n",
|
| 184 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 185 |
+
"and runs models locally using high performance C++ code.\n",
|
| 186 |
+
"\n",
|
| 187 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 188 |
+
"\n",
|
| 189 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+`) and run `ollama serve`"
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"cell_type": "markdown",
|
| 196 |
+
"id": "2761338c",
|
| 197 |
+
"metadata": {},
|
| 198 |
+
"source": [
|
| 199 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 200 |
+
" <tr>\n",
|
| 201 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 202 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 203 |
+
" </td>\n",
|
| 204 |
+
" <td>\n",
|
| 205 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 206 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 207 |
+
" </span>\n",
|
| 208 |
+
" </td>\n",
|
| 209 |
+
" </tr>\n",
|
| 210 |
+
"</table>"
|
| 211 |
+
]
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"cell_type": "code",
|
| 215 |
+
"execution_count": null,
|
| 216 |
+
"id": "35785614",
|
| 217 |
+
"metadata": {},
|
| 218 |
+
"outputs": [],
|
| 219 |
+
"source": [
|
| 220 |
+
"!ollama pull llama3.2"
|
| 221 |
+
]
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"cell_type": "code",
|
| 225 |
+
"execution_count": null,
|
| 226 |
+
"id": "e28b68fb",
|
| 227 |
+
"metadata": {},
|
| 228 |
+
"outputs": [],
|
| 229 |
+
"source": [
|
| 230 |
+
"# STEP 2: Initialize all model clients\n",
|
| 231 |
+
"\n",
|
| 232 |
+
"claude = Anthropic()\n",
|
| 233 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 234 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 235 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 236 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"# Map model names to their API clients\n",
|
| 239 |
+
"model_clients = {\n",
|
| 240 |
+
" \"gpt-5-nano\": (\"openai\", openai),\n",
|
| 241 |
+
" \"claude-sonnet-4-5\": (\"claude\", claude),\n",
|
| 242 |
+
" \"gemini-2.5-flash\": (\"gemini\", gemini),\n",
|
| 243 |
+
" \"deepseek-chat\": (\"deepseek\", deepseek),\n",
|
| 244 |
+
" \"openai/gpt-oss-120b\": (\"groq\", groq),\n",
|
| 245 |
+
" \"llama3.2\": (\"ollama\", ollama)\n",
|
| 246 |
+
"}\n",
|
| 247 |
+
"\n",
|
| 248 |
+
"print(\"✅ All model clients initialized\")"
|
| 249 |
+
]
|
| 250 |
+
},
|
| 251 |
+
{
|
| 252 |
+
"cell_type": "code",
|
| 253 |
+
"execution_count": null,
|
| 254 |
+
"id": "54b9bce6",
|
| 255 |
+
"metadata": {},
|
| 256 |
+
"outputs": [],
|
| 257 |
+
"source": [
|
| 258 |
+
"# STEP 3: Execute sub-questions with orchestrator's model recommendations\n",
|
| 259 |
+
"\n",
|
| 260 |
+
"sub_answers = {}\n",
|
| 261 |
+
"\n",
|
| 262 |
+
"for idx, item in enumerate(orchestration_plan[\"sub_questions\"], 1):\n",
|
| 263 |
+
" sub_q = item[\"question\"]\n",
|
| 264 |
+
" recommended_model = item[\"recommended_model\"]\n",
|
| 265 |
+
" \n",
|
| 266 |
+
" print(f\"\\n🤖 Task {idx}: Using {recommended_model}\")\n",
|
| 267 |
+
" print(f\"📝 Question: {sub_q[:80]}...\")\n",
|
| 268 |
+
" \n",
|
| 269 |
+
" messages = [{\"role\": \"user\", \"content\": sub_q}]\n",
|
| 270 |
+
" \n",
|
| 271 |
+
" # Route to the appropriate client\n",
|
| 272 |
+
" client_type, client = model_clients.get(recommended_model, (\"openai\", openai))\n",
|
| 273 |
+
" \n",
|
| 274 |
+
" try:\n",
|
| 275 |
+
" if client_type == \"claude\":\n",
|
| 276 |
+
" response = client.messages.create(\n",
|
| 277 |
+
" model=recommended_model, \n",
|
| 278 |
+
" messages=messages, \n",
|
| 279 |
+
" max_tokens=800\n",
|
| 280 |
+
" )\n",
|
| 281 |
+
" answer = response.content[0].text\n",
|
| 282 |
+
" else:\n",
|
| 283 |
+
" response = client.chat.completions.create(\n",
|
| 284 |
+
" model=recommended_model, \n",
|
| 285 |
+
" messages=messages\n",
|
| 286 |
+
" )\n",
|
| 287 |
+
" answer = response.choices[0].message.content\n",
|
| 288 |
+
" \n",
|
| 289 |
+
" sub_answers[sub_q] = {\n",
|
| 290 |
+
" \"model\": recommended_model,\n",
|
| 291 |
+
" \"answer\": answer,\n",
|
| 292 |
+
" \"reasoning\": item[\"reasoning\"]\n",
|
| 293 |
+
" }\n",
|
| 294 |
+
" print(f\"✅ Completed successfully\\n\")\n",
|
| 295 |
+
" \n",
|
| 296 |
+
" except Exception as e:\n",
|
| 297 |
+
" print(f\"❌ Error with {recommended_model}: {str(e)}\")\n",
|
| 298 |
+
" # Fallback to GPT-5-mini\n",
|
| 299 |
+
" response = openai.chat.completions.create(\n",
|
| 300 |
+
" model=\"gpt-5-mini\", \n",
|
| 301 |
+
" messages=messages\n",
|
| 302 |
+
" )\n",
|
| 303 |
+
" answer = response.choices[0].message.content\n",
|
| 304 |
+
" sub_answers[sub_q] = {\n",
|
| 305 |
+
" \"model\": \"gpt-5-mini (fallback)\",\n",
|
| 306 |
+
" \"answer\": answer,\n",
|
| 307 |
+
" \"reasoning\": \"Fallback due to error\"\n",
|
| 308 |
+
" }"
|
| 309 |
+
]
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"cell_type": "code",
|
| 313 |
+
"execution_count": null,
|
| 314 |
+
"id": "cfe99aba",
|
| 315 |
+
"metadata": {},
|
| 316 |
+
"outputs": [],
|
| 317 |
+
"source": [
|
| 318 |
+
"# Display the sub-answers\n",
|
| 319 |
+
"\n",
|
| 320 |
+
"for sub_q, data in sub_answers.items():\n",
|
| 321 |
+
" display(Markdown(f\"### Sub-Question: {sub_q}\"))\n",
|
| 322 |
+
" display(Markdown(f\"**Model Used:** {data['model']}\"))\n",
|
| 323 |
+
" display(Markdown(f\"**Answer:** {data['answer']}\"))\n",
|
| 324 |
+
" print(\"\\n\" + \"=\"*80 + \"\\n\")"
|
| 325 |
+
]
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"cell_type": "code",
|
| 329 |
+
"execution_count": null,
|
| 330 |
+
"id": "ff84289b",
|
| 331 |
+
"metadata": {},
|
| 332 |
+
"outputs": [],
|
| 333 |
+
"source": [
|
| 334 |
+
"# STEP 4: Synthesis - Combine all specialized responses\n",
|
| 335 |
+
"\n",
|
| 336 |
+
"synthesis_prompt = f\"\"\"You are a synthesis AI combining specialized responses into a comprehensive answer.\n",
|
| 337 |
+
"\n",
|
| 338 |
+
"ORIGINAL QUESTION: {question}\n",
|
| 339 |
+
"\n",
|
| 340 |
+
"The orchestrator intelligently routed sub-questions to models based on their strengths:\n",
|
| 341 |
+
"\n",
|
| 342 |
+
"\"\"\"\n",
|
| 343 |
+
"\n",
|
| 344 |
+
"for sub_q, data in sub_answers.items():\n",
|
| 345 |
+
" synthesis_prompt += f\"\\n{'='*60}\\n\"\n",
|
| 346 |
+
" synthesis_prompt += f\"SUB-QUESTION: {sub_q}\\n\"\n",
|
| 347 |
+
" synthesis_prompt += f\"ASSIGNED TO: {data['model']}\\n\"\n",
|
| 348 |
+
" synthesis_prompt += f\"SELECTION REASONING: {data['reasoning']}\\n\"\n",
|
| 349 |
+
" synthesis_prompt += f\"ANSWER: {data['answer']}\\n\"\n",
|
| 350 |
+
"\n",
|
| 351 |
+
"synthesis_prompt += f\"\\n{'='*60}\\n\"\n",
|
| 352 |
+
"synthesis_prompt += \"\\nSynthesize these specialized responses into one coherent, comprehensive answer to the original question.\"\n",
|
| 353 |
+
"synthesis_prompt += \"\\nHighlight how different model strengths contributed to the final answer.\"\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"synthesis_messages = [{\"role\": \"user\", \"content\": synthesis_prompt}]\n",
|
| 356 |
+
"response = openai.chat.completions.create(\n",
|
| 357 |
+
" model=\"gpt-5-nano\",\n",
|
| 358 |
+
" messages=synthesis_messages,\n",
|
| 359 |
+
")\n",
|
| 360 |
+
"synthesized_answer = response.choices[0].message.content\n",
|
| 361 |
+
"\n",
|
| 362 |
+
"display(Markdown(\"## 🎯 Intelligently Orchestrated & Synthesized Answer:\"))\n",
|
| 363 |
+
"display(Markdown(synthesized_answer))"
|
| 364 |
+
]
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"cell_type": "markdown",
|
| 368 |
+
"id": "5191a58a",
|
| 369 |
+
"metadata": {},
|
| 370 |
+
"source": [
|
| 371 |
+
"## Pattern Analysis"
|
| 372 |
+
]
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"cell_type": "code",
|
| 376 |
+
"execution_count": null,
|
| 377 |
+
"id": "7fa0de4c",
|
| 378 |
+
"metadata": {},
|
| 379 |
+
"outputs": [],
|
| 380 |
+
"source": [
|
| 381 |
+
"# Display pattern analysis\n",
|
| 382 |
+
"\n",
|
| 383 |
+
"model_list = '\\n'.join(f'- **{data[\"model\"]}**: {data[\"reasoning\"]}' for data in sub_answers.values())\n",
|
| 384 |
+
"\n",
|
| 385 |
+
"analysis = f\"\"\"\n",
|
| 386 |
+
"## 📊 Pattern Analysis\n",
|
| 387 |
+
"\n",
|
| 388 |
+
"### Patterns Used from Anthropic's Building Effective Agents:\n",
|
| 389 |
+
"\n",
|
| 390 |
+
"1. **Orchestrator-Workers Pattern** ✅\n",
|
| 391 |
+
" - One LLM coordinates the workflow\n",
|
| 392 |
+
" - Breaks complex tasks into subtasks\n",
|
| 393 |
+
" - Distributes work to specialized workers\n",
|
| 394 |
+
" - Synthesizes results into coherent output\n",
|
| 395 |
+
"\n",
|
| 396 |
+
"2. **Intelligent Routing Pattern** ✅\n",
|
| 397 |
+
" - Matches models to their specific strengths\n",
|
| 398 |
+
" - Dynamic model selection based on task requirements\n",
|
| 399 |
+
" - Optimizes for quality by leveraging specialization\n",
|
| 400 |
+
"\n",
|
| 401 |
+
"3. **Implicit Parallelization** ⚡\n",
|
| 402 |
+
" - Sub-questions can be executed in parallel\n",
|
| 403 |
+
" - Independent tasks distributed across models\n",
|
| 404 |
+
"\n",
|
| 405 |
+
"### Key Innovations:\n",
|
| 406 |
+
"\n",
|
| 407 |
+
"**Capability-Aware Orchestration**: This is more sophisticated than simple task distribution. \n",
|
| 408 |
+
"The orchestrator:\n",
|
| 409 |
+
"- Understands each model's strengths and weaknesses\n",
|
| 410 |
+
"- Makes intelligent routing decisions\n",
|
| 411 |
+
"- Documents its reasoning for transparency\n",
|
| 412 |
+
"- Enables cost optimization (expensive models only where needed)\n",
|
| 413 |
+
"\n",
|
| 414 |
+
"### Models Used in This Run:\n",
|
| 415 |
+
"{model_list}\n",
|
| 416 |
+
"\n",
|
| 417 |
+
"### Total API Calls:\n",
|
| 418 |
+
"- 1 orchestrator call (question decomposition)\n",
|
| 419 |
+
"- {len(sub_answers)} worker calls (sub-question answering)\n",
|
| 420 |
+
"- 1 synthesizer call (final answer composition)\n",
|
| 421 |
+
"- **Total: {len(sub_answers) + 2} API calls**\n",
|
| 422 |
+
"\"\"\"\n",
|
| 423 |
+
"\n",
|
| 424 |
+
"display(Markdown(analysis))"
|
| 425 |
+
]
|
| 426 |
+
},
|
| 427 |
+
{
|
| 428 |
+
"cell_type": "markdown",
|
| 429 |
+
"id": "3434b0a7",
|
| 430 |
+
"metadata": {},
|
| 431 |
+
"source": [
|
| 432 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 433 |
+
" <tr>\n",
|
| 434 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 435 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 436 |
+
" </td>\n",
|
| 437 |
+
" <td>\n",
|
| 438 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 439 |
+
" <span style=\"color:#ff7800;\">Try modifying the orchestrator prompt to include cost considerations. Add a 'budget' field for each model and have the orchestrator balance quality vs. cost when making routing decisions.\n",
|
| 440 |
+
" </span>\n",
|
| 441 |
+
" </td>\n",
|
| 442 |
+
" </tr>\n",
|
| 443 |
+
"</table>"
|
| 444 |
+
]
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"cell_type": "markdown",
|
| 448 |
+
"id": "0168301c",
|
| 449 |
+
"metadata": {},
|
| 450 |
+
"source": [
|
| 451 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 452 |
+
" <tr>\n",
|
| 453 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 454 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 455 |
+
" </td>\n",
|
| 456 |
+
" <td>\n",
|
| 457 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 458 |
+
" <span style=\"color:#00bfff;\">The Intelligent Orchestrator pattern is critical for production systems where:\n",
|
| 459 |
+
" <ul>\n",
|
| 460 |
+
" <li><b>Cost optimization</b> matters - use expensive models only where their strengths are needed</li>\n",
|
| 461 |
+
" <li><b>Quality is paramount</b> - leverage specialization for each aspect of complex tasks</li>\n",
|
| 462 |
+
" <li><b>Scalability is required</b> - easily add new models and define their capabilities</li>\n",
|
| 463 |
+
" <li><b>Transparency is valued</b> - document routing decisions and reasoning</li>\n",
|
| 464 |
+
" </ul>\n",
|
| 465 |
+
" This pattern mirrors how you'd assemble a team of specialists for a complex project, making it intuitive for business stakeholders to understand.\n",
|
| 466 |
+
" </span>\n",
|
| 467 |
+
" </td>\n",
|
| 468 |
+
" </tr>\n",
|
| 469 |
+
"</table>"
|
| 470 |
+
]
|
| 471 |
+
}
|
| 472 |
+
],
|
| 473 |
+
"metadata": {
|
| 474 |
+
"kernelspec": {
|
| 475 |
+
"display_name": "agents",
|
| 476 |
+
"language": "python",
|
| 477 |
+
"name": "python3"
|
| 478 |
+
},
|
| 479 |
+
"language_info": {
|
| 480 |
+
"codemirror_mode": {
|
| 481 |
+
"name": "ipython",
|
| 482 |
+
"version": 3
|
| 483 |
+
},
|
| 484 |
+
"file_extension": ".py",
|
| 485 |
+
"mimetype": "text/x-python",
|
| 486 |
+
"name": "python",
|
| 487 |
+
"nbconvert_exporter": "python",
|
| 488 |
+
"pygments_lexer": "ipython3",
|
| 489 |
+
"version": "3.12.12"
|
| 490 |
+
}
|
| 491 |
+
},
|
| 492 |
+
"nbformat": 4,
|
| 493 |
+
"nbformat_minor": 5
|
| 494 |
+
}
|
community_contributions/2_lab2_perplexity_support.ipynb
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "code",
|
| 32 |
+
"execution_count": 1,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [],
|
| 35 |
+
"source": [
|
| 36 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"import os\n",
|
| 39 |
+
"import json\n",
|
| 40 |
+
"from dotenv import load_dotenv\n",
|
| 41 |
+
"from openai import OpenAI\n",
|
| 42 |
+
"from anthropic import Anthropic\n",
|
| 43 |
+
"from IPython.display import Markdown, display"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"# Always remember to do this!\n",
|
| 53 |
+
"load_dotenv(override=True)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
+
"perplexity_api_key = os.getenv('PERPLEXITY_API_KEY')\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"if openai_api_key:\n",
|
| 72 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 73 |
+
"else:\n",
|
| 74 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 75 |
+
" \n",
|
| 76 |
+
"if anthropic_api_key:\n",
|
| 77 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 78 |
+
"else:\n",
|
| 79 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 80 |
+
"\n",
|
| 81 |
+
"if google_api_key:\n",
|
| 82 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 83 |
+
"else:\n",
|
| 84 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 85 |
+
"\n",
|
| 86 |
+
"if deepseek_api_key:\n",
|
| 87 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 88 |
+
"else:\n",
|
| 89 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 90 |
+
"\n",
|
| 91 |
+
"if groq_api_key:\n",
|
| 92 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 93 |
+
"else:\n",
|
| 94 |
+
" print(\"Groq API Key not set (and this is optional)\")\n",
|
| 95 |
+
"\n",
|
| 96 |
+
"if perplexity_api_key:\n",
|
| 97 |
+
" print(f\"Perplexity API Key exists and begins {perplexity_api_key[:4]}\")\n",
|
| 98 |
+
"else:\n",
|
| 99 |
+
" print(\"Perplexity API Key not set (and this is optional)\")"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"cell_type": "code",
|
| 104 |
+
"execution_count": 4,
|
| 105 |
+
"metadata": {},
|
| 106 |
+
"outputs": [],
|
| 107 |
+
"source": [
|
| 108 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 109 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 110 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 111 |
+
]
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"cell_type": "code",
|
| 115 |
+
"execution_count": null,
|
| 116 |
+
"metadata": {},
|
| 117 |
+
"outputs": [],
|
| 118 |
+
"source": [
|
| 119 |
+
"messages"
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"cell_type": "code",
|
| 124 |
+
"execution_count": null,
|
| 125 |
+
"metadata": {},
|
| 126 |
+
"outputs": [],
|
| 127 |
+
"source": [
|
| 128 |
+
"openai = OpenAI()\n",
|
| 129 |
+
"response = openai.chat.completions.create(\n",
|
| 130 |
+
" model=\"gpt-4o-mini\",\n",
|
| 131 |
+
" messages=messages,\n",
|
| 132 |
+
")\n",
|
| 133 |
+
"question = response.choices[0].message.content\n",
|
| 134 |
+
"print(question)\n"
|
| 135 |
+
]
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"cell_type": "code",
|
| 139 |
+
"execution_count": null,
|
| 140 |
+
"metadata": {},
|
| 141 |
+
"outputs": [],
|
| 142 |
+
"source": [
|
| 143 |
+
"competitors = []\n",
|
| 144 |
+
"answers = []\n",
|
| 145 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 146 |
+
]
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"cell_type": "code",
|
| 150 |
+
"execution_count": null,
|
| 151 |
+
"metadata": {},
|
| 152 |
+
"outputs": [],
|
| 153 |
+
"source": [
|
| 154 |
+
"# The API we know well\n",
|
| 155 |
+
"\n",
|
| 156 |
+
"model_name = \"gpt-4o-mini\"\n",
|
| 157 |
+
"\n",
|
| 158 |
+
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 159 |
+
"answer = response.choices[0].message.content\n",
|
| 160 |
+
"\n",
|
| 161 |
+
"display(Markdown(answer))\n",
|
| 162 |
+
"competitors.append(model_name)\n",
|
| 163 |
+
"answers.append(answer)"
|
| 164 |
+
]
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"cell_type": "code",
|
| 168 |
+
"execution_count": null,
|
| 169 |
+
"metadata": {},
|
| 170 |
+
"outputs": [],
|
| 171 |
+
"source": [
|
| 172 |
+
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 173 |
+
"\n",
|
| 174 |
+
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 175 |
+
"\n",
|
| 176 |
+
"claude = Anthropic()\n",
|
| 177 |
+
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 178 |
+
"answer = response.content[0].text\n",
|
| 179 |
+
"\n",
|
| 180 |
+
"display(Markdown(answer))\n",
|
| 181 |
+
"competitors.append(model_name)\n",
|
| 182 |
+
"answers.append(answer)"
|
| 183 |
+
]
|
| 184 |
+
},
|
| 185 |
+
{
|
| 186 |
+
"cell_type": "code",
|
| 187 |
+
"execution_count": null,
|
| 188 |
+
"metadata": {},
|
| 189 |
+
"outputs": [],
|
| 190 |
+
"source": [
|
| 191 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 192 |
+
"model_name = \"gemini-2.0-flash\"\n",
|
| 193 |
+
"\n",
|
| 194 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 195 |
+
"answer = response.choices[0].message.content\n",
|
| 196 |
+
"\n",
|
| 197 |
+
"display(Markdown(answer))\n",
|
| 198 |
+
"competitors.append(model_name)\n",
|
| 199 |
+
"answers.append(answer)"
|
| 200 |
+
]
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"cell_type": "code",
|
| 204 |
+
"execution_count": null,
|
| 205 |
+
"metadata": {},
|
| 206 |
+
"outputs": [],
|
| 207 |
+
"source": [
|
| 208 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 209 |
+
"model_name = \"deepseek-chat\"\n",
|
| 210 |
+
"\n",
|
| 211 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 212 |
+
"answer = response.choices[0].message.content\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"display(Markdown(answer))\n",
|
| 215 |
+
"competitors.append(model_name)\n",
|
| 216 |
+
"answers.append(answer)"
|
| 217 |
+
]
|
| 218 |
+
},
|
| 219 |
+
{
|
| 220 |
+
"cell_type": "code",
|
| 221 |
+
"execution_count": null,
|
| 222 |
+
"metadata": {},
|
| 223 |
+
"outputs": [],
|
| 224 |
+
"source": [
|
| 225 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 226 |
+
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 227 |
+
"\n",
|
| 228 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 229 |
+
"answer = response.choices[0].message.content\n",
|
| 230 |
+
"\n",
|
| 231 |
+
"display(Markdown(answer))\n",
|
| 232 |
+
"competitors.append(model_name)\n",
|
| 233 |
+
"answers.append(answer)\n"
|
| 234 |
+
]
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"cell_type": "code",
|
| 238 |
+
"execution_count": null,
|
| 239 |
+
"metadata": {},
|
| 240 |
+
"outputs": [],
|
| 241 |
+
"source": [
|
| 242 |
+
"perplexity = OpenAI(api_key=perplexity_api_key, base_url=\"https://api.perplexity.ai\")\n",
|
| 243 |
+
"model_name = \"sonar\"\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"response = perplexity.chat.completions.create(model=model_name, messages=messages)\n",
|
| 246 |
+
"answer = response.choices[0].message.content\n",
|
| 247 |
+
"\n",
|
| 248 |
+
"display(Markdown(answer))\n",
|
| 249 |
+
"competitors.append(model_name)\n",
|
| 250 |
+
"answers.append(answer)"
|
| 251 |
+
]
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"cell_type": "markdown",
|
| 255 |
+
"metadata": {},
|
| 256 |
+
"source": [
|
| 257 |
+
"## For the next cell, we will use Ollama\n",
|
| 258 |
+
"\n",
|
| 259 |
+
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 260 |
+
"and runs models locally using high performance C++ code.\n",
|
| 261 |
+
"\n",
|
| 262 |
+
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 263 |
+
"\n",
|
| 264 |
+
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 265 |
+
"\n",
|
| 266 |
+
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 267 |
+
"\n",
|
| 268 |
+
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"`ollama pull <model_name>` downloads a model locally \n",
|
| 271 |
+
"`ollama ls` lists all the models you've downloaded \n",
|
| 272 |
+
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 273 |
+
]
|
| 274 |
+
},
|
| 275 |
+
{
|
| 276 |
+
"cell_type": "markdown",
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"source": [
|
| 279 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 280 |
+
" <tr>\n",
|
| 281 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 282 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 283 |
+
" </td>\n",
|
| 284 |
+
" <td>\n",
|
| 285 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 286 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 287 |
+
" </span>\n",
|
| 288 |
+
" </td>\n",
|
| 289 |
+
" </tr>\n",
|
| 290 |
+
"</table>"
|
| 291 |
+
]
|
| 292 |
+
},
|
| 293 |
+
{
|
| 294 |
+
"cell_type": "code",
|
| 295 |
+
"execution_count": null,
|
| 296 |
+
"metadata": {},
|
| 297 |
+
"outputs": [],
|
| 298 |
+
"source": [
|
| 299 |
+
"!ollama pull llama3.2"
|
| 300 |
+
]
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"cell_type": "code",
|
| 304 |
+
"execution_count": null,
|
| 305 |
+
"metadata": {},
|
| 306 |
+
"outputs": [],
|
| 307 |
+
"source": [
|
| 308 |
+
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 309 |
+
"model_name = \"llama3.2\"\n",
|
| 310 |
+
"\n",
|
| 311 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 312 |
+
"answer = response.choices[0].message.content\n",
|
| 313 |
+
"\n",
|
| 314 |
+
"display(Markdown(answer))\n",
|
| 315 |
+
"competitors.append(model_name)\n",
|
| 316 |
+
"answers.append(answer)"
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "code",
|
| 321 |
+
"execution_count": null,
|
| 322 |
+
"metadata": {},
|
| 323 |
+
"outputs": [],
|
| 324 |
+
"source": [
|
| 325 |
+
"# So where are we?\n",
|
| 326 |
+
"\n",
|
| 327 |
+
"print(competitors)\n",
|
| 328 |
+
"print(answers)\n"
|
| 329 |
+
]
|
| 330 |
+
},
|
| 331 |
+
{
|
| 332 |
+
"cell_type": "code",
|
| 333 |
+
"execution_count": null,
|
| 334 |
+
"metadata": {},
|
| 335 |
+
"outputs": [],
|
| 336 |
+
"source": [
|
| 337 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 338 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 339 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 340 |
+
]
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"cell_type": "code",
|
| 344 |
+
"execution_count": 20,
|
| 345 |
+
"metadata": {},
|
| 346 |
+
"outputs": [],
|
| 347 |
+
"source": [
|
| 348 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 349 |
+
"\n",
|
| 350 |
+
"together = \"\"\n",
|
| 351 |
+
"for index, answer in enumerate(answers):\n",
|
| 352 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 353 |
+
" together += answer + \"\\n\\n\""
|
| 354 |
+
]
|
| 355 |
+
},
|
| 356 |
+
{
|
| 357 |
+
"cell_type": "code",
|
| 358 |
+
"execution_count": null,
|
| 359 |
+
"metadata": {},
|
| 360 |
+
"outputs": [],
|
| 361 |
+
"source": [
|
| 362 |
+
"print(together)"
|
| 363 |
+
]
|
| 364 |
+
},
|
| 365 |
+
{
|
| 366 |
+
"cell_type": "code",
|
| 367 |
+
"execution_count": 22,
|
| 368 |
+
"metadata": {},
|
| 369 |
+
"outputs": [],
|
| 370 |
+
"source": [
|
| 371 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 372 |
+
"Each model has been given this question:\n",
|
| 373 |
+
"\n",
|
| 374 |
+
"{question}\n",
|
| 375 |
+
"\n",
|
| 376 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 377 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 378 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 379 |
+
"\n",
|
| 380 |
+
"Here are the responses from each competitor:\n",
|
| 381 |
+
"\n",
|
| 382 |
+
"{together}\n",
|
| 383 |
+
"\n",
|
| 384 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 385 |
+
]
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"cell_type": "code",
|
| 389 |
+
"execution_count": null,
|
| 390 |
+
"metadata": {},
|
| 391 |
+
"outputs": [],
|
| 392 |
+
"source": [
|
| 393 |
+
"print(judge)"
|
| 394 |
+
]
|
| 395 |
+
},
|
| 396 |
+
{
|
| 397 |
+
"cell_type": "code",
|
| 398 |
+
"execution_count": 29,
|
| 399 |
+
"metadata": {},
|
| 400 |
+
"outputs": [],
|
| 401 |
+
"source": [
|
| 402 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 403 |
+
]
|
| 404 |
+
},
|
| 405 |
+
{
|
| 406 |
+
"cell_type": "code",
|
| 407 |
+
"execution_count": null,
|
| 408 |
+
"metadata": {},
|
| 409 |
+
"outputs": [],
|
| 410 |
+
"source": [
|
| 411 |
+
"# Judgement time!\n",
|
| 412 |
+
"\n",
|
| 413 |
+
"openai = OpenAI()\n",
|
| 414 |
+
"response = openai.chat.completions.create(\n",
|
| 415 |
+
" model=\"o3-mini\",\n",
|
| 416 |
+
" messages=judge_messages,\n",
|
| 417 |
+
")\n",
|
| 418 |
+
"results = response.choices[0].message.content\n",
|
| 419 |
+
"print(results)\n"
|
| 420 |
+
]
|
| 421 |
+
},
|
| 422 |
+
{
|
| 423 |
+
"cell_type": "code",
|
| 424 |
+
"execution_count": null,
|
| 425 |
+
"metadata": {},
|
| 426 |
+
"outputs": [],
|
| 427 |
+
"source": [
|
| 428 |
+
"# OK let's turn this into results!\n",
|
| 429 |
+
"\n",
|
| 430 |
+
"results_dict = json.loads(results)\n",
|
| 431 |
+
"ranks = results_dict[\"results\"]\n",
|
| 432 |
+
"for index, result in enumerate(ranks):\n",
|
| 433 |
+
" competitor = competitors[int(result)-1]\n",
|
| 434 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 435 |
+
]
|
| 436 |
+
},
|
| 437 |
+
{
|
| 438 |
+
"cell_type": "markdown",
|
| 439 |
+
"metadata": {},
|
| 440 |
+
"source": [
|
| 441 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 442 |
+
" <tr>\n",
|
| 443 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 444 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 445 |
+
" </td>\n",
|
| 446 |
+
" <td>\n",
|
| 447 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 448 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 449 |
+
" </span>\n",
|
| 450 |
+
" </td>\n",
|
| 451 |
+
" </tr>\n",
|
| 452 |
+
"</table>"
|
| 453 |
+
]
|
| 454 |
+
},
|
| 455 |
+
{
|
| 456 |
+
"cell_type": "markdown",
|
| 457 |
+
"metadata": {},
|
| 458 |
+
"source": [
|
| 459 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 460 |
+
" <tr>\n",
|
| 461 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 462 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 463 |
+
" </td>\n",
|
| 464 |
+
" <td>\n",
|
| 465 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 466 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 467 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 468 |
+
" to business projects where accuracy is critical.\n",
|
| 469 |
+
" </span>\n",
|
| 470 |
+
" </td>\n",
|
| 471 |
+
" </tr>\n",
|
| 472 |
+
"</table>"
|
| 473 |
+
]
|
| 474 |
+
}
|
| 475 |
+
],
|
| 476 |
+
"metadata": {
|
| 477 |
+
"kernelspec": {
|
| 478 |
+
"display_name": ".venv",
|
| 479 |
+
"language": "python",
|
| 480 |
+
"name": "python3"
|
| 481 |
+
},
|
| 482 |
+
"language_info": {
|
| 483 |
+
"codemirror_mode": {
|
| 484 |
+
"name": "ipython",
|
| 485 |
+
"version": 3
|
| 486 |
+
},
|
| 487 |
+
"file_extension": ".py",
|
| 488 |
+
"mimetype": "text/x-python",
|
| 489 |
+
"name": "python",
|
| 490 |
+
"nbconvert_exporter": "python",
|
| 491 |
+
"pygments_lexer": "ipython3",
|
| 492 |
+
"version": "3.12.3"
|
| 493 |
+
}
|
| 494 |
+
},
|
| 495 |
+
"nbformat": 4,
|
| 496 |
+
"nbformat_minor": 2
|
| 497 |
+
}
|
community_contributions/2_lab2_qualitycode_review.ipynb
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "4226f6f7",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import os\n",
|
| 11 |
+
"import json\n",
|
| 12 |
+
"from dotenv import load_dotenv\n",
|
| 13 |
+
"from openai import OpenAI\n",
|
| 14 |
+
"from IPython.display import Markdown, display"
|
| 15 |
+
]
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"cell_type": "code",
|
| 19 |
+
"execution_count": 5,
|
| 20 |
+
"id": "4cdb4a69",
|
| 21 |
+
"metadata": {},
|
| 22 |
+
"outputs": [],
|
| 23 |
+
"source": [
|
| 24 |
+
"load_dotenv(override=True)\n",
|
| 25 |
+
"\n",
|
| 26 |
+
"openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
|
| 27 |
+
"google_api_key = os.getenv(\"GOOGLE_API_KEY\")\n",
|
| 28 |
+
"\n",
|
| 29 |
+
"if openai_api_key is None:\n",
|
| 30 |
+
" raise ValueError(\"OPENAI_API_KEY is not set\")\n",
|
| 31 |
+
"\n",
|
| 32 |
+
"if google_api_key is None:\n",
|
| 33 |
+
" raise ValueError(\"GOOGLE_API_KEY is not set\")\n",
|
| 34 |
+
"\n",
|
| 35 |
+
"\n",
|
| 36 |
+
"\n",
|
| 37 |
+
"# The API we know well\n",
|
| 38 |
+
"# I've updated this with the latest model, but it can take some time because it likes to think!\n",
|
| 39 |
+
"# Replace the model with gpt-4.1-mini if you'd prefer not to wait 1-2 mins"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"cell_type": "code",
|
| 44 |
+
"execution_count": 3,
|
| 45 |
+
"id": "31c74663",
|
| 46 |
+
"metadata": {},
|
| 47 |
+
"outputs": [],
|
| 48 |
+
"source": [
|
| 49 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to generate a code for algorithm like binary tree for live coding competition. \"\n",
|
| 50 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 51 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "code",
|
| 56 |
+
"execution_count": 4,
|
| 57 |
+
"id": "0b9dc1d7",
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"outputs": [
|
| 60 |
+
{
|
| 61 |
+
"name": "stdout",
|
| 62 |
+
"output_type": "stream",
|
| 63 |
+
"text": [
|
| 64 |
+
"[{'role': 'user', 'content': 'Please come up with a challenging, nuanced question that I can ask a number of LLMs to generate a code for algorithm like binary tree for live coding competition. Answer only with the question, no explanation.'}]\n"
|
| 65 |
+
]
|
| 66 |
+
}
|
| 67 |
+
],
|
| 68 |
+
"source": [
|
| 69 |
+
"print(messages)"
|
| 70 |
+
]
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"cell_type": "code",
|
| 74 |
+
"execution_count": 6,
|
| 75 |
+
"id": "298de8ab",
|
| 76 |
+
"metadata": {},
|
| 77 |
+
"outputs": [
|
| 78 |
+
{
|
| 79 |
+
"name": "stdout",
|
| 80 |
+
"output_type": "stream",
|
| 81 |
+
"text": [
|
| 82 |
+
"How would you implement a binary tree in Python that includes methods for insertion, deletion, traversal (in-order, pre-order, post-order), and searching for a specific value, while also ensuring balanced height after each insertion?\n"
|
| 83 |
+
]
|
| 84 |
+
}
|
| 85 |
+
],
|
| 86 |
+
"source": [
|
| 87 |
+
"openai = OpenAI()\n",
|
| 88 |
+
"response = openai.chat.completions.create(\n",
|
| 89 |
+
" model=\"gpt-4o-mini\",\n",
|
| 90 |
+
" messages=messages,\n",
|
| 91 |
+
")\n",
|
| 92 |
+
"question = response.choices[0].message.content\n",
|
| 93 |
+
"print(question)"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": 7,
|
| 99 |
+
"id": "b26c539a",
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"competitors = []\n",
|
| 104 |
+
"answers = []\n",
|
| 105 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"cell_type": "code",
|
| 110 |
+
"execution_count": null,
|
| 111 |
+
"id": "cdd1c225",
|
| 112 |
+
"metadata": {},
|
| 113 |
+
"outputs": [],
|
| 114 |
+
"source": [
|
| 115 |
+
"model_name = \"gpt-5-mini\"\n",
|
| 116 |
+
"\n",
|
| 117 |
+
"openai = OpenAI()\n",
|
| 118 |
+
"response = openai.chat.completions.create(\n",
|
| 119 |
+
" model=\"gpt-5-mini\",\n",
|
| 120 |
+
" messages=messages,\n",
|
| 121 |
+
")\n",
|
| 122 |
+
"answer = response.choices[0].message.content\n",
|
| 123 |
+
"\n",
|
| 124 |
+
"display(Markdown(answer))\n",
|
| 125 |
+
"answers.append(answer)\n",
|
| 126 |
+
"competitors.append(model_name)\n"
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"cell_type": "code",
|
| 131 |
+
"execution_count": null,
|
| 132 |
+
"id": "ad9ccdb4",
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [],
|
| 135 |
+
"source": [
|
| 136 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 137 |
+
"model_name = \"gemini-2.5-flash\"\n",
|
| 138 |
+
"\n",
|
| 139 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 140 |
+
"answer = response.choices[0].message.content\n",
|
| 141 |
+
"\n",
|
| 142 |
+
"display(Markdown(answer))\n",
|
| 143 |
+
"competitors.append(model_name)\n",
|
| 144 |
+
"answers.append(answer)"
|
| 145 |
+
]
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"cell_type": "code",
|
| 149 |
+
"execution_count": null,
|
| 150 |
+
"id": "14709041",
|
| 151 |
+
"metadata": {},
|
| 152 |
+
"outputs": [],
|
| 153 |
+
"source": [
|
| 154 |
+
"ollama = OpenAI(base_url=\"http://localhost:11434/v1\")\n",
|
| 155 |
+
"model_name = \"phi3:latest\"\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 158 |
+
"answer = response.choices[0].message.content\n",
|
| 159 |
+
"\n",
|
| 160 |
+
"display(Markdown(answer))\n",
|
| 161 |
+
"competitors.append(model_name)\n",
|
| 162 |
+
"answers.append(answer)"
|
| 163 |
+
]
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"cell_type": "code",
|
| 167 |
+
"execution_count": null,
|
| 168 |
+
"id": "dd5e23f2",
|
| 169 |
+
"metadata": {},
|
| 170 |
+
"outputs": [],
|
| 171 |
+
"source": [
|
| 172 |
+
"print(competitors)\n",
|
| 173 |
+
"print(answers)"
|
| 174 |
+
]
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"cell_type": "code",
|
| 178 |
+
"execution_count": null,
|
| 179 |
+
"id": "96a5c917",
|
| 180 |
+
"metadata": {},
|
| 181 |
+
"outputs": [],
|
| 182 |
+
"source": [
|
| 183 |
+
"# It's nice to know how to use \"zip\"\n",
|
| 184 |
+
"for competitor, answer in zip(competitors, answers):\n",
|
| 185 |
+
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 186 |
+
]
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"cell_type": "code",
|
| 190 |
+
"execution_count": 25,
|
| 191 |
+
"id": "4e71c1c5",
|
| 192 |
+
"metadata": {},
|
| 193 |
+
"outputs": [],
|
| 194 |
+
"source": [
|
| 195 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 196 |
+
"\n",
|
| 197 |
+
"together = \"\"\n",
|
| 198 |
+
"for index, answer in enumerate(answers):\n",
|
| 199 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 200 |
+
" together += answer + \"\\n\\n\""
|
| 201 |
+
]
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"cell_type": "code",
|
| 205 |
+
"execution_count": null,
|
| 206 |
+
"id": "db4b67c4",
|
| 207 |
+
"metadata": {},
|
| 208 |
+
"outputs": [],
|
| 209 |
+
"source": [
|
| 210 |
+
"print(together)"
|
| 211 |
+
]
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"cell_type": "code",
|
| 215 |
+
"execution_count": 26,
|
| 216 |
+
"id": "dbf92ba2",
|
| 217 |
+
"metadata": {},
|
| 218 |
+
"outputs": [],
|
| 219 |
+
"source": [
|
| 220 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 221 |
+
"Each model has been given this question:\n",
|
| 222 |
+
"\n",
|
| 223 |
+
"{question}\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 226 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 227 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 228 |
+
"\n",
|
| 229 |
+
"Here are the responses from each competitor:\n",
|
| 230 |
+
"\n",
|
| 231 |
+
"{together}\n",
|
| 232 |
+
"\n",
|
| 233 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 234 |
+
]
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"cell_type": "code",
|
| 238 |
+
"execution_count": null,
|
| 239 |
+
"id": "3eebf961",
|
| 240 |
+
"metadata": {},
|
| 241 |
+
"outputs": [],
|
| 242 |
+
"source": [
|
| 243 |
+
"print(judge)"
|
| 244 |
+
]
|
| 245 |
+
},
|
| 246 |
+
{
|
| 247 |
+
"cell_type": "code",
|
| 248 |
+
"execution_count": 27,
|
| 249 |
+
"id": "5953feb5",
|
| 250 |
+
"metadata": {},
|
| 251 |
+
"outputs": [],
|
| 252 |
+
"source": [
|
| 253 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"cell_type": "code",
|
| 258 |
+
"execution_count": null,
|
| 259 |
+
"id": "8bde0152",
|
| 260 |
+
"metadata": {},
|
| 261 |
+
"outputs": [],
|
| 262 |
+
"source": [
|
| 263 |
+
"# Judgement time!\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"openai = OpenAI()\n",
|
| 266 |
+
"response = openai.chat.completions.create(\n",
|
| 267 |
+
" model=\"gpt-5-mini\",\n",
|
| 268 |
+
" messages=judge_messages,\n",
|
| 269 |
+
")\n",
|
| 270 |
+
"results = response.choices[0].message.content\n",
|
| 271 |
+
"print(results)\n"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": null,
|
| 277 |
+
"id": "2c8f1410",
|
| 278 |
+
"metadata": {},
|
| 279 |
+
"outputs": [],
|
| 280 |
+
"source": [
|
| 281 |
+
"# OK let's turn this into results!\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"results_dict = json.loads(results)\n",
|
| 284 |
+
"ranks = results_dict[\"results\"]\n",
|
| 285 |
+
"for index, result in enumerate(ranks):\n",
|
| 286 |
+
" competitor = competitors[int(result)-1]\n",
|
| 287 |
+
" print(f\"Rank {index+1}: {competitor}\")"
|
| 288 |
+
]
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"cell_type": "code",
|
| 292 |
+
"execution_count": null,
|
| 293 |
+
"id": "e5e6f540",
|
| 294 |
+
"metadata": {},
|
| 295 |
+
"outputs": [],
|
| 296 |
+
"source": []
|
| 297 |
+
}
|
| 298 |
+
],
|
| 299 |
+
"metadata": {
|
| 300 |
+
"kernelspec": {
|
| 301 |
+
"display_name": ".venv",
|
| 302 |
+
"language": "python",
|
| 303 |
+
"name": "python3"
|
| 304 |
+
},
|
| 305 |
+
"language_info": {
|
| 306 |
+
"codemirror_mode": {
|
| 307 |
+
"name": "ipython",
|
| 308 |
+
"version": 3
|
| 309 |
+
},
|
| 310 |
+
"file_extension": ".py",
|
| 311 |
+
"mimetype": "text/x-python",
|
| 312 |
+
"name": "python",
|
| 313 |
+
"nbconvert_exporter": "python",
|
| 314 |
+
"pygments_lexer": "ipython3",
|
| 315 |
+
"version": "3.12.8"
|
| 316 |
+
}
|
| 317 |
+
},
|
| 318 |
+
"nbformat": 4,
|
| 319 |
+
"nbformat_minor": 5
|
| 320 |
+
}
|
community_contributions/2_lab2_reflection_pattern.ipynb
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
+
" <tr>\n",
|
| 18 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
+
" </td>\n",
|
| 21 |
+
" <td>\n",
|
| 22 |
+
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
+
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
+
" </span>\n",
|
| 25 |
+
" </td>\n",
|
| 26 |
+
" </tr>\n",
|
| 27 |
+
"</table>"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "markdown",
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"source": [
|
| 34 |
+
"This version adds Reflection pattern where we ask each model to critique and improve its own answer."
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"cell_type": "code",
|
| 39 |
+
"execution_count": 9,
|
| 40 |
+
"metadata": {},
|
| 41 |
+
"outputs": [],
|
| 42 |
+
"source": [
|
| 43 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 44 |
+
"\n",
|
| 45 |
+
"import os\n",
|
| 46 |
+
"import json\n",
|
| 47 |
+
"from dotenv import load_dotenv\n",
|
| 48 |
+
"from openai import OpenAI\n",
|
| 49 |
+
"from anthropic import Anthropic\n",
|
| 50 |
+
"from IPython.display import Markdown, display"
|
| 51 |
+
]
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"cell_type": "markdown",
|
| 55 |
+
"metadata": {},
|
| 56 |
+
"source": []
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"cell_type": "code",
|
| 60 |
+
"execution_count": 12,
|
| 61 |
+
"metadata": {},
|
| 62 |
+
"outputs": [],
|
| 63 |
+
"source": [
|
| 64 |
+
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 65 |
+
"request += \"Answer only with the question, no explanation.\"\n",
|
| 66 |
+
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"cell_type": "code",
|
| 71 |
+
"execution_count": null,
|
| 72 |
+
"metadata": {},
|
| 73 |
+
"outputs": [],
|
| 74 |
+
"source": [
|
| 75 |
+
"messages"
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"cell_type": "code",
|
| 80 |
+
"execution_count": 14,
|
| 81 |
+
"metadata": {},
|
| 82 |
+
"outputs": [],
|
| 83 |
+
"source": [
|
| 84 |
+
"competitors = []\n",
|
| 85 |
+
"answers = []\n",
|
| 86 |
+
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 87 |
+
]
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"cell_type": "code",
|
| 91 |
+
"execution_count": null,
|
| 92 |
+
"metadata": {},
|
| 93 |
+
"outputs": [],
|
| 94 |
+
"source": [
|
| 95 |
+
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 96 |
+
"model_name = \"gemini-2.0-flash\"\n",
|
| 97 |
+
"\n",
|
| 98 |
+
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 99 |
+
"answer = response.choices[0].message.content\n",
|
| 100 |
+
"\n",
|
| 101 |
+
"display(Markdown(answer))\n",
|
| 102 |
+
"competitors.append(model_name)\n",
|
| 103 |
+
"answers.append(answer)"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "code",
|
| 108 |
+
"execution_count": null,
|
| 109 |
+
"metadata": {},
|
| 110 |
+
"outputs": [],
|
| 111 |
+
"source": [
|
| 112 |
+
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 113 |
+
"model_name = \"deepseek-chat\"\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 116 |
+
"answer = response.choices[0].message.content\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"display(Markdown(answer))\n",
|
| 119 |
+
"competitors.append(model_name)\n",
|
| 120 |
+
"answers.append(answer)"
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"cell_type": "code",
|
| 125 |
+
"execution_count": null,
|
| 126 |
+
"metadata": {},
|
| 127 |
+
"outputs": [],
|
| 128 |
+
"source": [
|
| 129 |
+
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 130 |
+
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 131 |
+
"\n",
|
| 132 |
+
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 133 |
+
"answer = response.choices[0].message.content\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"display(Markdown(answer))\n",
|
| 136 |
+
"competitors.append(model_name)\n",
|
| 137 |
+
"answers.append(answer)\n"
|
| 138 |
+
]
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"cell_type": "markdown",
|
| 142 |
+
"metadata": {},
|
| 143 |
+
"source": [
|
| 144 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 145 |
+
" <tr>\n",
|
| 146 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 147 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 148 |
+
" </td>\n",
|
| 149 |
+
" <td>\n",
|
| 150 |
+
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 151 |
+
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 152 |
+
" </span>\n",
|
| 153 |
+
" </td>\n",
|
| 154 |
+
" </tr>\n",
|
| 155 |
+
"</table>"
|
| 156 |
+
]
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"cell_type": "code",
|
| 160 |
+
"execution_count": null,
|
| 161 |
+
"metadata": {},
|
| 162 |
+
"outputs": [],
|
| 163 |
+
"source": [
|
| 164 |
+
"!ollama pull llama3.2"
|
| 165 |
+
]
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"cell_type": "code",
|
| 169 |
+
"execution_count": 33,
|
| 170 |
+
"metadata": {},
|
| 171 |
+
"outputs": [],
|
| 172 |
+
"source": [
|
| 173 |
+
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 174 |
+
"\n",
|
| 175 |
+
"together = \"\"\n",
|
| 176 |
+
"for index, answer in enumerate(answers):\n",
|
| 177 |
+
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 178 |
+
" together += answer + \"\\n\\n\""
|
| 179 |
+
]
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"cell_type": "code",
|
| 183 |
+
"execution_count": 36,
|
| 184 |
+
"metadata": {},
|
| 185 |
+
"outputs": [],
|
| 186 |
+
"source": [
|
| 187 |
+
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 188 |
+
"Each model has been given this question:\n",
|
| 189 |
+
"\n",
|
| 190 |
+
"{question}\n",
|
| 191 |
+
"\n",
|
| 192 |
+
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 193 |
+
"Respond with JSON, and only JSON, with the following format:\n",
|
| 194 |
+
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 195 |
+
"\n",
|
| 196 |
+
"Here are the responses from each competitor:\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"{together}\n",
|
| 199 |
+
"\n",
|
| 200 |
+
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 201 |
+
]
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"cell_type": "code",
|
| 205 |
+
"execution_count": 38,
|
| 206 |
+
"metadata": {},
|
| 207 |
+
"outputs": [],
|
| 208 |
+
"source": [
|
| 209 |
+
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 210 |
+
]
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"cell_type": "markdown",
|
| 214 |
+
"metadata": {},
|
| 215 |
+
"source": [
|
| 216 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 217 |
+
" <tr>\n",
|
| 218 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 219 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 220 |
+
" </td>\n",
|
| 221 |
+
" <td>\n",
|
| 222 |
+
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 223 |
+
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 224 |
+
" </span>\n",
|
| 225 |
+
" </td>\n",
|
| 226 |
+
" </tr>\n",
|
| 227 |
+
"</table>"
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"cell_type": "markdown",
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"source": [
|
| 234 |
+
"1. Ensemble (Model Competition) Pattern\n",
|
| 235 |
+
"Description: The same prompt/question is sent to multiple different LLMs (OpenAI, Anthropic, Ollama, etc.).\n",
|
| 236 |
+
"Purpose: To compare the quality, style, and content of responses from different models.\n",
|
| 237 |
+
"Where in notebook:\n",
|
| 238 |
+
"The code sends the same question to several models and collects their answers in the competitors and answers lists.\n",
|
| 239 |
+
"\n",
|
| 240 |
+
"2. Judging/Evaluator Pattern\n",
|
| 241 |
+
"Description: After collecting responses from all models, another LLM is used as a “judge” to evaluate and rank the responses.\n",
|
| 242 |
+
"Purpose: To automate the assessment of which model gave the best answer, based on clarity and strength of argument.\n",
|
| 243 |
+
"Where in notebook:\n",
|
| 244 |
+
"The judge prompt is constructed, and an LLM is asked to rank the responses in JSON format.\n",
|
| 245 |
+
"\n",
|
| 246 |
+
"3. Self-Improvement/Meta-Reasoning Pattern\n",
|
| 247 |
+
"Description: The system not only generates answers but also reflects on and evaluates its own outputs (or those of its peers).\n",
|
| 248 |
+
"Purpose: To iteratively improve or select the best output, often used in advanced agentic systems.\n",
|
| 249 |
+
"Where in notebook:\n",
|
| 250 |
+
"The “judge” LLM is an example of meta-reasoning, as it reasons about the quality of other LLMs’ outputs.\n",
|
| 251 |
+
"\n",
|
| 252 |
+
"4. Chain-of-Thought/Decomposition Pattern (to a lesser extent)\n",
|
| 253 |
+
"Description: Breaking down a complex task into subtasks (e.g., generate question → get answers → evaluate answers).\n",
|
| 254 |
+
"Purpose: To improve reliability and interpretability by structuring the workflow.\n",
|
| 255 |
+
"Where in notebook:\n",
|
| 256 |
+
"The workflow is decomposed into:\n",
|
| 257 |
+
"Generating a challenging question\n",
|
| 258 |
+
"Getting answers from multiple models\n",
|
| 259 |
+
"Judging the answers\n",
|
| 260 |
+
"\n",
|
| 261 |
+
"In short:\n",
|
| 262 |
+
"This notebook uses the Ensemble/Competition, Judging/Evaluator, and Meta-Reasoning agentic patterns, and also demonstrates a simple form of Decomposition by structuring the workflow into clear stages.\n",
|
| 263 |
+
"If you want to add more agentic patterns, you could try things like:\n",
|
| 264 |
+
"Reflexion (let models critique and revise their own answers)\n",
|
| 265 |
+
"Tool Use (let models call external tools or APIs)\n",
|
| 266 |
+
"Planning (let a model plan the steps before answering)"
|
| 267 |
+
]
|
| 268 |
+
},
|
| 269 |
+
{
|
| 270 |
+
"cell_type": "markdown",
|
| 271 |
+
"metadata": {},
|
| 272 |
+
"source": [
|
| 273 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 274 |
+
" <tr>\n",
|
| 275 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 276 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 277 |
+
" </td>\n",
|
| 278 |
+
" <td>\n",
|
| 279 |
+
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 280 |
+
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 281 |
+
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 282 |
+
" to business projects where accuracy is critical.\n",
|
| 283 |
+
" </span>\n",
|
| 284 |
+
" </td>\n",
|
| 285 |
+
" </tr>\n",
|
| 286 |
+
"</table>"
|
| 287 |
+
]
|
| 288 |
+
}
|
| 289 |
+
],
|
| 290 |
+
"metadata": {
|
| 291 |
+
"kernelspec": {
|
| 292 |
+
"display_name": ".venv",
|
| 293 |
+
"language": "python",
|
| 294 |
+
"name": "python3"
|
| 295 |
+
},
|
| 296 |
+
"language_info": {
|
| 297 |
+
"codemirror_mode": {
|
| 298 |
+
"name": "ipython",
|
| 299 |
+
"version": 3
|
| 300 |
+
},
|
| 301 |
+
"file_extension": ".py",
|
| 302 |
+
"mimetype": "text/x-python",
|
| 303 |
+
"name": "python",
|
| 304 |
+
"nbconvert_exporter": "python",
|
| 305 |
+
"pygments_lexer": "ipython3",
|
| 306 |
+
"version": "3.12.8"
|
| 307 |
+
}
|
| 308 |
+
},
|
| 309 |
+
"nbformat": 4,
|
| 310 |
+
"nbformat_minor": 2
|
| 311 |
+
}
|
community_contributions/2_lab2_reflection_pattern2.ipynb
ADDED
|
@@ -0,0 +1,999 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## Welcome to the Second Lab - Exercise: Advanced Agentic Design Patterns\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"This notebook extends the previous lab by adding the **Reflection Pattern** to improve response quality.\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"### Patterns used in the original lab:\n",
|
| 12 |
+
"1. **Multi-Model Comparison Pattern** - Comparing multiple models\n",
|
| 13 |
+
"2. **Judge/Evaluator Pattern** - Evaluation by a judge model\n",
|
| 14 |
+
"\n",
|
| 15 |
+
"### New pattern added:\n",
|
| 16 |
+
"3. **Reflection Pattern** - Self-improvement of responses"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "markdown",
|
| 21 |
+
"metadata": {},
|
| 22 |
+
"source": [
|
| 23 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 24 |
+
" <tr>\n",
|
| 25 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 26 |
+
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 27 |
+
" </td>\n",
|
| 28 |
+
" <td>\n",
|
| 29 |
+
" <h2 style=\"color:#ff7800;\">New Pattern: Reflection</h2>\n",
|
| 30 |
+
" <span style=\"color:#ff7800;\">The Reflection Pattern allows a model to critique and improve its own response. This is particularly useful for complex tasks requiring nuance and precision.</span>\n",
|
| 31 |
+
" </td>\n",
|
| 32 |
+
" </tr>\n",
|
| 33 |
+
"</table>"
|
| 34 |
+
]
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"cell_type": "code",
|
| 38 |
+
"execution_count": 1,
|
| 39 |
+
"metadata": {},
|
| 40 |
+
"outputs": [
|
| 41 |
+
{
|
| 42 |
+
"data": {
|
| 43 |
+
"text/plain": [
|
| 44 |
+
"True"
|
| 45 |
+
]
|
| 46 |
+
},
|
| 47 |
+
"execution_count": 1,
|
| 48 |
+
"metadata": {},
|
| 49 |
+
"output_type": "execute_result"
|
| 50 |
+
}
|
| 51 |
+
],
|
| 52 |
+
"source": [
|
| 53 |
+
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 54 |
+
"\n",
|
| 55 |
+
"import os\n",
|
| 56 |
+
"import json\n",
|
| 57 |
+
"from dotenv import load_dotenv\n",
|
| 58 |
+
"from openai import OpenAI\n",
|
| 59 |
+
"from anthropic import Anthropic\n",
|
| 60 |
+
"from IPython.display import Markdown, display\n",
|
| 61 |
+
"\n",
|
| 62 |
+
"# Always remember to do this!\n",
|
| 63 |
+
"load_dotenv(override=True)"
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"cell_type": "code",
|
| 68 |
+
"execution_count": 2,
|
| 69 |
+
"metadata": {},
|
| 70 |
+
"outputs": [
|
| 71 |
+
{
|
| 72 |
+
"name": "stdout",
|
| 73 |
+
"output_type": "stream",
|
| 74 |
+
"text": [
|
| 75 |
+
"OpenAI API Key exists and begins sk-1kYcH\n",
|
| 76 |
+
"Anthropic API Key exists and begins sk-ant-\n",
|
| 77 |
+
"Google API Key not set (and this is optional)\n",
|
| 78 |
+
"DeepSeek API Key not set (and this is optional)\n",
|
| 79 |
+
"Groq API Key not set (and this is optional)\n"
|
| 80 |
+
]
|
| 81 |
+
}
|
| 82 |
+
],
|
| 83 |
+
"source": [
|
| 84 |
+
"# Print the key prefixes to help with any debugging\n",
|
| 85 |
+
"\n",
|
| 86 |
+
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 87 |
+
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 88 |
+
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 89 |
+
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 90 |
+
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"if openai_api_key:\n",
|
| 93 |
+
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 94 |
+
"else:\n",
|
| 95 |
+
" print(\"OpenAI API Key not set\")\n",
|
| 96 |
+
" \n",
|
| 97 |
+
"if anthropic_api_key:\n",
|
| 98 |
+
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 99 |
+
"else:\n",
|
| 100 |
+
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"if google_api_key:\n",
|
| 103 |
+
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 104 |
+
"else:\n",
|
| 105 |
+
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 106 |
+
"\n",
|
| 107 |
+
"if deepseek_api_key:\n",
|
| 108 |
+
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 109 |
+
"else:\n",
|
| 110 |
+
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 111 |
+
"\n",
|
| 112 |
+
"if groq_api_key:\n",
|
| 113 |
+
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 114 |
+
"else:\n",
|
| 115 |
+
" print(\"Groq API Key not set (and this is optional)\")"
|
| 116 |
+
]
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"cell_type": "markdown",
|
| 120 |
+
"metadata": {},
|
| 121 |
+
"source": [
|
| 122 |
+
"## Step 1: Generate Initial Question (Multi-Model Pattern)"
|
| 123 |
+
]
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"cell_type": "code",
|
| 127 |
+
"execution_count": 3,
|
| 128 |
+
"metadata": {},
|
| 129 |
+
"outputs": [
|
| 130 |
+
{
|
| 131 |
+
"name": "stdout",
|
| 132 |
+
"output_type": "stream",
|
| 133 |
+
"text": [
|
| 134 |
+
"Generated Question:\n",
|
| 135 |
+
"A wealthy philanthropist has developed a new drug that can cure a rare but fatal disease affecting a small population. However, the drug is expensive to produce and the philanthropist only has enough resources to manufacture a limited supply. At the same time, a competing pharmaceutical company has discovered the cure but plans to charge exorbitant prices, making it inaccessible for most patients. \n",
|
| 136 |
+
"\n",
|
| 137 |
+
"The philanthropist learns that if they invest their resources into manufacturing the drug, it can be distributed at a lower cost but only to a select few who are already on a waiting list, prioritizing those who are most likely to recover. Alternatively, the philanthropist could sell the formula to the competing company for a substantial profit, ensuring that a broader population can access the cure, albeit at high prices that many cannot afford.\n",
|
| 138 |
+
"\n",
|
| 139 |
+
"The dilemma: Should the philanthropist prioritize the immediate health of a few individuals by providing the cure at a lower cost, or should they consider the greater good by allowing the competitive company to distribute the cure to a wider audience at a higher price?\n"
|
| 140 |
+
]
|
| 141 |
+
}
|
| 142 |
+
],
|
| 143 |
+
"source": [
|
| 144 |
+
"# Generate a challenging question for the models to answer\n",
|
| 145 |
+
"\n",
|
| 146 |
+
"request = \"Please come up with a challenging ethical dilemma that requires careful moral reasoning and consideration of multiple perspectives. \"\n",
|
| 147 |
+
"request += \"The dilemma should involve conflicting values and have no clear-cut answer. Answer only with the dilemma, no explanation.\"\n",
|
| 148 |
+
"\n",
|
| 149 |
+
"messages = [{\"role\": \"user\", \"content\": request}]\n",
|
| 150 |
+
"\n",
|
| 151 |
+
"openai = OpenAI()\n",
|
| 152 |
+
"response = openai.chat.completions.create(\n",
|
| 153 |
+
" model=\"gpt-4o-mini\",\n",
|
| 154 |
+
" messages=messages,\n",
|
| 155 |
+
")\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"question = response.choices[0].message.content\n",
|
| 158 |
+
"print(\"Generated Question:\")\n",
|
| 159 |
+
"print(question)"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "markdown",
|
| 164 |
+
"metadata": {},
|
| 165 |
+
"source": [
|
| 166 |
+
"## Step 2: Get Initial Responses from Multiple Models"
|
| 167 |
+
]
|
| 168 |
+
},
|
| 169 |
+
{
|
| 170 |
+
"cell_type": "code",
|
| 171 |
+
"execution_count": 4,
|
| 172 |
+
"metadata": {},
|
| 173 |
+
"outputs": [],
|
| 174 |
+
"source": [
|
| 175 |
+
"def get_initial_response(client, model_name, question, is_anthropic=False):\n",
|
| 176 |
+
" \"\"\"Get initial response from a model\"\"\"\n",
|
| 177 |
+
" messages = [{\"role\": \"user\", \"content\": question}]\n",
|
| 178 |
+
" \n",
|
| 179 |
+
" if is_anthropic:\n",
|
| 180 |
+
" response = client.messages.create(\n",
|
| 181 |
+
" model=model_name, \n",
|
| 182 |
+
" messages=messages, \n",
|
| 183 |
+
" max_tokens=1000\n",
|
| 184 |
+
" )\n",
|
| 185 |
+
" return response.content[0].text\n",
|
| 186 |
+
" else:\n",
|
| 187 |
+
" response = client.chat.completions.create(\n",
|
| 188 |
+
" model=model_name, \n",
|
| 189 |
+
" messages=messages\n",
|
| 190 |
+
" )\n",
|
| 191 |
+
" return response.choices[0].message.content"
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"cell_type": "code",
|
| 196 |
+
"execution_count": 5,
|
| 197 |
+
"metadata": {},
|
| 198 |
+
"outputs": [],
|
| 199 |
+
"source": [
|
| 200 |
+
"# Configure clients\n",
|
| 201 |
+
"openai_client = OpenAI()\n",
|
| 202 |
+
"claude_client = Anthropic() if anthropic_api_key else None\n",
|
| 203 |
+
"gemini_client = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\") if google_api_key else None\n",
|
| 204 |
+
"deepseek_client = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\") if deepseek_api_key else None\n",
|
| 205 |
+
"groq_client = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\") if groq_api_key else None"
|
| 206 |
+
]
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"cell_type": "code",
|
| 210 |
+
"execution_count": 6,
|
| 211 |
+
"metadata": {},
|
| 212 |
+
"outputs": [
|
| 213 |
+
{
|
| 214 |
+
"name": "stdout",
|
| 215 |
+
"output_type": "stream",
|
| 216 |
+
"text": [
|
| 217 |
+
"\n",
|
| 218 |
+
"=== INITIAL RESPONSES ===\n",
|
| 219 |
+
"\n",
|
| 220 |
+
"**gpt-4o-mini:**\n"
|
| 221 |
+
]
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"data": {
|
| 225 |
+
"text/markdown": [
|
| 226 |
+
"This ethical dilemma presents a challenging decision for the philanthropist, who must weigh the immediate health needs of a few individuals against the broader societal implications of drug distribution and access.\n",
|
| 227 |
+
"\n",
|
| 228 |
+
"### Option 1: Prioritizing Immediate Health\n",
|
| 229 |
+
"\n",
|
| 230 |
+
"If the philanthropist chooses to manufacture the drug and distribute it at a lower cost to those on the waiting list, they are directly addressing the pressing health needs of a select few individuals who are already vulnerable. This action prioritizes compassion and the moral obligation to help those who are suffering. By ensuring that the drug is available to those with the highest likelihood of recovery, the philanthropist demonstrates an ethical commitment to saving lives and reducing suffering in the short term.\n",
|
| 231 |
+
"\n",
|
| 232 |
+
"However, this approach has limitations. By distributing the drug to only a small number of patients, the philanthropist may overlook other individuals who could benefit from the cure. Additionally, this solution does not address the systemic issue of access to healthcare and affordable medications for the larger population suffering from the disease.\n",
|
| 233 |
+
"\n",
|
| 234 |
+
"### Option 2: Considering the Greater Good\n",
|
| 235 |
+
"\n",
|
| 236 |
+
"On the other hand, selling the formula to the competing pharmaceutical company for a substantial profit could lead to a wider distribution of the drug, although at a higher price point that may make it inaccessible to many patients. In this scenario, the philanthropist uses their financial gain to potentially invest in other healthcare initiatives or research, thus contributing to the long-term improvement of medical care or addressing related health issues.\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"This choice raises ethical concerns regarding the prioritization of profit over compassion and the risk that many individuals will remain unable to afford the life-saving treatment. It also creates a tension between the ideals of philanthropy and the realities of the pharmaceutical industry, which often operates on profit motives rather than altruistic goals.\n",
|
| 239 |
+
"\n",
|
| 240 |
+
"### Balancing the Two Options\n",
|
| 241 |
+
"\n",
|
| 242 |
+
"A possible compromise could be for the philanthropist to negotiate a deal with the pharmaceutical company that ensures a tiered pricing structure, where those who can afford the drug pay more while discounts or alternative funding are provided for low-income patients. This could help bridge the gap between immediate health needs and wider access.\n",
|
| 243 |
+
"\n",
|
| 244 |
+
"Ultimately, the decision comes down to the philanthropist's values and vision for their impact on public health. Do they prioritize saving a few lives in the short term or seek a more sustainable, albeit imperfect, solution that aims at broader access over a longer timeframe? The complexity of the dilemma emphasizes the need for thoughtful deliberation on how best to serve both individual health needs and the greater public good."
|
| 245 |
+
],
|
| 246 |
+
"text/plain": [
|
| 247 |
+
"<IPython.core.display.Markdown object>"
|
| 248 |
+
]
|
| 249 |
+
},
|
| 250 |
+
"metadata": {},
|
| 251 |
+
"output_type": "display_data"
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"name": "stdout",
|
| 255 |
+
"output_type": "stream",
|
| 256 |
+
"text": [
|
| 257 |
+
"\n",
|
| 258 |
+
"==================================================\n",
|
| 259 |
+
"\n",
|
| 260 |
+
"**claude-3-7-sonnet-latest:**\n"
|
| 261 |
+
]
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"data": {
|
| 265 |
+
"text/markdown": [
|
| 266 |
+
"# The Philanthropist's Dilemma\n",
|
| 267 |
+
"\n",
|
| 268 |
+
"This is a complex ethical dilemma that involves several important considerations:\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"## Key Ethical Tensions\n",
|
| 271 |
+
"\n",
|
| 272 |
+
"- **Limited access at affordable prices** vs. **wider access at unaffordable prices**\n",
|
| 273 |
+
"- **Immediate relief for a few** vs. **potential long-term access for many**\n",
|
| 274 |
+
"- **Direct control over distribution** vs. **surrendering control to profit-motivated actors**\n",
|
| 275 |
+
"\n",
|
| 276 |
+
"## Considerations for Manufacturing the Drug Directly\n",
|
| 277 |
+
"\n",
|
| 278 |
+
"**Benefits:**\n",
|
| 279 |
+
"- Ensures the most vulnerable patients receive treatment based on medical need rather than ability to pay\n",
|
| 280 |
+
"- Maintains the philanthropist's ethical vision and control over distribution\n",
|
| 281 |
+
"- Sets a precedent for compassionate drug pricing\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"**Drawbacks:**\n",
|
| 284 |
+
"- Limited overall reach due to resource constraints\n",
|
| 285 |
+
"- Potentially slower scaling of production\n",
|
| 286 |
+
"- Many patients may receive no treatment at all\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"## Considerations for Selling to the Pharmaceutical Company\n",
|
| 289 |
+
"\n",
|
| 290 |
+
"**Benefits:**\n",
|
| 291 |
+
"- Potentially greater production capacity and distribution reach\n",
|
| 292 |
+
"- The philanthropist could use profits to subsidize costs for those who cannot afford it\n",
|
| 293 |
+
"- Might accelerate further research and development\n",
|
| 294 |
+
"\n",
|
| 295 |
+
"**Drawbacks:**\n",
|
| 296 |
+
"- Many patients would be excluded based on financial means\n",
|
| 297 |
+
"- Surrenders control over an essential medicine to profit-motivated decision-making\n",
|
| 298 |
+
"- Could establish a problematic precedent for pricing life-saving medications\n",
|
| 299 |
+
"\n",
|
| 300 |
+
"This dilemma reflects broader tensions in healthcare ethics between utilitarian approaches (helping the most people) and justice-based approaches (ensuring fair access based on need rather than wealth).\n",
|
| 301 |
+
"\n",
|
| 302 |
+
"There might be creative third options worth exploring, such as licensing agreements with price caps, creating a non-profit manufacturing entity, or partnering with governments to ensure broader affordable access."
|
| 303 |
+
],
|
| 304 |
+
"text/plain": [
|
| 305 |
+
"<IPython.core.display.Markdown object>"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
"metadata": {},
|
| 309 |
+
"output_type": "display_data"
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"name": "stdout",
|
| 313 |
+
"output_type": "stream",
|
| 314 |
+
"text": [
|
| 315 |
+
"\n",
|
| 316 |
+
"==================================================\n",
|
| 317 |
+
"\n"
|
| 318 |
+
]
|
| 319 |
+
}
|
| 320 |
+
],
|
| 321 |
+
"source": [
|
| 322 |
+
"# Collect initial responses\n",
|
| 323 |
+
"initial_responses = {}\n",
|
| 324 |
+
"competitors = []\n",
|
| 325 |
+
"\n",
|
| 326 |
+
"models = [\n",
|
| 327 |
+
" (\"gpt-4o-mini\", openai_client, False),\n",
|
| 328 |
+
" (\"claude-3-7-sonnet-latest\", claude_client, True),\n",
|
| 329 |
+
" (\"gemini-2.0-flash\", gemini_client, False),\n",
|
| 330 |
+
" (\"deepseek-chat\", deepseek_client, False),\n",
|
| 331 |
+
" (\"llama-3.3-70b-versatile\", groq_client, False),\n",
|
| 332 |
+
"]\n",
|
| 333 |
+
"\n",
|
| 334 |
+
"print(\"\\n=== INITIAL RESPONSES ===\\n\")\n",
|
| 335 |
+
"\n",
|
| 336 |
+
"for model_name, client, is_anthropic in models:\n",
|
| 337 |
+
" if client:\n",
|
| 338 |
+
" try:\n",
|
| 339 |
+
" response = get_initial_response(client, model_name, question, is_anthropic)\n",
|
| 340 |
+
" initial_responses[model_name] = response\n",
|
| 341 |
+
" competitors.append(model_name)\n",
|
| 342 |
+
" \n",
|
| 343 |
+
" print(f\"**{model_name}:**\")\n",
|
| 344 |
+
" display(Markdown(response))\n",
|
| 345 |
+
" print(\"\\n\" + \"=\"*50 + \"\\n\")\n",
|
| 346 |
+
" except Exception as e:\n",
|
| 347 |
+
" print(f\"Error with {model_name}: {e}\")"
|
| 348 |
+
]
|
| 349 |
+
},
|
| 350 |
+
{
|
| 351 |
+
"cell_type": "markdown",
|
| 352 |
+
"metadata": {},
|
| 353 |
+
"source": [
|
| 354 |
+
"## Step 3: NEW PATTERN - Reflection Pattern"
|
| 355 |
+
]
|
| 356 |
+
},
|
| 357 |
+
{
|
| 358 |
+
"cell_type": "code",
|
| 359 |
+
"execution_count": 7,
|
| 360 |
+
"metadata": {},
|
| 361 |
+
"outputs": [],
|
| 362 |
+
"source": [
|
| 363 |
+
"def apply_reflection_pattern(client, model_name, original_question, initial_response, is_anthropic=False):\n",
|
| 364 |
+
" \"\"\"Apply the Reflection Pattern to improve a response\"\"\"\n",
|
| 365 |
+
" \n",
|
| 366 |
+
" reflection_prompt = f\"\"\"\n",
|
| 367 |
+
"You previously received this question:\n",
|
| 368 |
+
"{original_question}\n",
|
| 369 |
+
"\n",
|
| 370 |
+
"Here was your initial response:\n",
|
| 371 |
+
"{initial_response}\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"Now, as a critical expert, analyze your own response:\n",
|
| 374 |
+
"1. What are the strengths of this response?\n",
|
| 375 |
+
"2. What important perspectives are missing?\n",
|
| 376 |
+
"3. Are there any biases or blind spots in the analysis?\n",
|
| 377 |
+
"4. How could you improve this response?\n",
|
| 378 |
+
"\n",
|
| 379 |
+
"After this self-critique, provide an IMPROVED response that takes into account your observations.\n",
|
| 380 |
+
"\n",
|
| 381 |
+
"Response format:\n",
|
| 382 |
+
"## Self-Critique\n",
|
| 383 |
+
"[Your critical analysis of the initial response]\n",
|
| 384 |
+
"\n",
|
| 385 |
+
"## Improved Response\n",
|
| 386 |
+
"[Your revised and improved response]\n",
|
| 387 |
+
"\"\"\"\n",
|
| 388 |
+
" \n",
|
| 389 |
+
" messages = [{\"role\": \"user\", \"content\": reflection_prompt}]\n",
|
| 390 |
+
" \n",
|
| 391 |
+
" if is_anthropic:\n",
|
| 392 |
+
" response = client.messages.create(\n",
|
| 393 |
+
" model=model_name, \n",
|
| 394 |
+
" messages=messages, \n",
|
| 395 |
+
" max_tokens=1500\n",
|
| 396 |
+
" )\n",
|
| 397 |
+
" return response.content[0].text\n",
|
| 398 |
+
" else:\n",
|
| 399 |
+
" response = client.chat.completions.create(\n",
|
| 400 |
+
" model=model_name, \n",
|
| 401 |
+
" messages=messages\n",
|
| 402 |
+
" )\n",
|
| 403 |
+
" return response.choices[0].message.content"
|
| 404 |
+
]
|
| 405 |
+
},
|
| 406 |
+
{
|
| 407 |
+
"cell_type": "code",
|
| 408 |
+
"execution_count": 8,
|
| 409 |
+
"metadata": {},
|
| 410 |
+
"outputs": [
|
| 411 |
+
{
|
| 412 |
+
"name": "stdout",
|
| 413 |
+
"output_type": "stream",
|
| 414 |
+
"text": [
|
| 415 |
+
"\n",
|
| 416 |
+
"=== RESPONSES AFTER REFLECTION ===\n",
|
| 417 |
+
"\n",
|
| 418 |
+
"**gpt-4o-mini - After Reflection:**\n"
|
| 419 |
+
]
|
| 420 |
+
},
|
| 421 |
+
{
|
| 422 |
+
"data": {
|
| 423 |
+
"text/markdown": [
|
| 424 |
+
"## Self-Critique\n",
|
| 425 |
+
"1. **Strengths of this Response:**\n",
|
| 426 |
+
" - The response thoroughly outlines both options available to the philanthropist, providing a balanced view of the ethical implications of each choice.\n",
|
| 427 |
+
" - It acknowledges the immediate health needs of affected individuals as well as the broader societal implications of drug distribution.\n",
|
| 428 |
+
" - It introduces a potential compromise solution, which adds depth to the analysis and suggests a more nuanced approach to the dilemma.\n",
|
| 429 |
+
"\n",
|
| 430 |
+
"2. **Important Perspectives Missing:**\n",
|
| 431 |
+
" - The response does not adequately consider the potential operational and logistical challenges in manufacturing and distributing the drug at a lower cost, including regulatory hurdles and the scalability of production.\n",
|
| 432 |
+
" - There is limited discussion on the emotional impact of the decision on the patients and their families, which could influence the philanthropist's considerations.\n",
|
| 433 |
+
" - The perspective of other stakeholders, such as healthcare providers and ethicists, is not introduced.\n",
|
| 434 |
+
"\n",
|
| 435 |
+
"3. **Biases or Blind Spots in the Analysis:**\n",
|
| 436 |
+
" - The response may lean towards prioritizing compassion over economic pragmatism, possibly downplaying the complexities involved in pharmaceutical economics and the realities that arise from selling to a corporation with profit motives.\n",
|
| 437 |
+
" - It assumes a binary choice rather than considering other stakeholder impacts and longer-term systemic solutions.\n",
|
| 438 |
+
"\n",
|
| 439 |
+
"4. **How to Improve This Response:**\n",
|
| 440 |
+
" - Include more contextual factors that might affect the decision, such as regulatory considerations, patient demographics, and healthcare infrastructure.\n",
|
| 441 |
+
" - Expand on the emotional and psychological aspects of the decision-making process for both the philanthropist and the patients involved.\n",
|
| 442 |
+
" - Address the potential for future societal implications if the competing company monopolizes the market after acquiring the formula.\n",
|
| 443 |
+
"\n",
|
| 444 |
+
"## Improved Response\n",
|
| 445 |
+
"This ethical dilemma presents the philanthropist with a complex decision regarding how best to utilize limited resources to maximize the benefit for individuals suffering from a rare but fatal disease. The two primary options – providing a low-cost supply to a select few or selling the formula for broader but costly distribution – both highlight significant ethical considerations.\n",
|
| 446 |
+
"\n",
|
| 447 |
+
"### Option 1: Prioritizing Immediate Health\n",
|
| 448 |
+
"By choosing to manufacture the drug at a lower cost for those on the waiting list, the philanthropist opts to directly address the urgent health needs of vulnerable individuals. This approach reflects a moral obligation to alleviate suffering and save lives in the short term. Prioritizing individuals with the highest likelihood of recovery can lead to tangible, immediate outcomes for those patients and their families.\n",
|
| 449 |
+
"\n",
|
| 450 |
+
"However, there are operational challenges associated with this choice. Limited production capabilities may mean that only a fraction of those in need can actually receive the drug, leaving many others without hope. Additionally, this decision doesn't resolve the systemic issues within healthcare, such as overall treatment accessibility and drug pricing, which may persist if not tackled holistically.\n",
|
| 451 |
+
"\n",
|
| 452 |
+
"### Option 2: Considering the Greater Good\n",
|
| 453 |
+
"Alternatively, selling the formula to the competing pharmaceutical company could result in wider distribution of the drug and potentially more patients benefiting from the cure, albeit at higher prices. This choice could finance further philanthropic efforts or investments in healthcare that might ultimately lead to broader long-term improvements in public health.\n",
|
| 454 |
+
"\n",
|
| 455 |
+
"However, ethical concerns arise when considering the high pricing of the cure. The decision may disproportionately disadvantage lower-income patients, perpetuating healthcare inequities. Furthermore, there is the risk that this choice could enable the pharmaceutical company to monopolize treatment options, further exploitation in the industry.\n",
|
| 456 |
+
"\n",
|
| 457 |
+
"### A Balanced Approach\n",
|
| 458 |
+
"To navigate this complex dilemma more thoughtfully, the philanthropist could explore a compromise by negotiating with the pharmaceutical company to establish a tiered pricing structure. This could create a system where the drug is offered at a reduced price for low-income patients, while ensuring sustainability for the company through higher prices for those who can afford them. Additionally, the philanthropist might advocate for a commitment from the company to invest in generics or alternative distribution methods to enhance accessibility.\n",
|
| 459 |
+
"\n",
|
| 460 |
+
"### Conclusion\n",
|
| 461 |
+
"The choice ultimately hinges on the philanthropist's values and vision for their impact on public health. This decision requires careful consideration of immediate health benefits, long-term accessibility, and the emotional ramifications for affected individuals. By weighing the implications of each option and considering collaborative solutions, the philanthropist can work towards an outcome that promotes both individual care and broader societal well-being."
|
| 462 |
+
],
|
| 463 |
+
"text/plain": [
|
| 464 |
+
"<IPython.core.display.Markdown object>"
|
| 465 |
+
]
|
| 466 |
+
},
|
| 467 |
+
"metadata": {},
|
| 468 |
+
"output_type": "display_data"
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"name": "stdout",
|
| 472 |
+
"output_type": "stream",
|
| 473 |
+
"text": [
|
| 474 |
+
"\n",
|
| 475 |
+
"==================================================\n",
|
| 476 |
+
"\n",
|
| 477 |
+
"**claude-3-7-sonnet-latest - After Reflection:**\n"
|
| 478 |
+
]
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"data": {
|
| 482 |
+
"text/markdown": [
|
| 483 |
+
"## Self-Critique\n",
|
| 484 |
+
"\n",
|
| 485 |
+
"### Strengths of the initial response:\n",
|
| 486 |
+
"- Well-structured analysis that clearly outlines the ethical tensions\n",
|
| 487 |
+
"- Presents balanced considerations for both options\n",
|
| 488 |
+
"- Mentions potential third options beyond the binary choice\n",
|
| 489 |
+
"- Identifies the broader ethical frameworks at play (utilitarian vs. justice-based approaches)\n",
|
| 490 |
+
"\n",
|
| 491 |
+
"### Missing perspectives:\n",
|
| 492 |
+
"1. **Stakeholder analysis**: The response lacks a thorough examination of all affected parties (patients, healthcare systems, future patients, etc.)\n",
|
| 493 |
+
"2. **Timeline considerations**: No discussion of short-term vs. long-term consequences beyond immediate access\n",
|
| 494 |
+
"3. **Public health impact**: Limited analysis of how each option affects overall public health outcomes\n",
|
| 495 |
+
"4. **Precedent-setting effects**: Inadequate exploration of how this decision might influence future pharmaceutical development and pricing\n",
|
| 496 |
+
"5. **Regulatory context**: No mention of potential government intervention, price controls, or other regulatory factors\n",
|
| 497 |
+
"6. **Global justice perspective**: No consideration of how this decision affects different regions/countries\n",
|
| 498 |
+
"\n",
|
| 499 |
+
"### Biases and blind spots:\n",
|
| 500 |
+
"1. **False dichotomy**: Despite mentioning \"third options,\" the analysis primarily treats this as a binary choice\n",
|
| 501 |
+
"2. **Western/developed-world bias**: Assumes a market-based healthcare system without considering different global contexts\n",
|
| 502 |
+
"3. **Individual-focused ethics**: Overemphasizes individual choice rather than institutional or systemic responsibilities\n",
|
| 503 |
+
"4. **Overly abstract**: The analysis lacks concrete examples or case studies that might inform the decision\n",
|
| 504 |
+
"5. **Neglect of power dynamics**: Doesn't address the power imbalance between corporations, individuals, and patients\n",
|
| 505 |
+
"\n",
|
| 506 |
+
"### Improvement opportunities:\n",
|
| 507 |
+
"1. Provide a more nuanced spectrum of options beyond the binary choice\n",
|
| 508 |
+
"2. Include more stakeholder perspectives, particularly patient voices\n",
|
| 509 |
+
"3. Consider real-world case studies of similar pharmaceutical dilemmas\n",
|
| 510 |
+
"4. Address systemic issues in drug development and pharmaceutical pricing\n",
|
| 511 |
+
"5. Explore collaborative approaches that leverage multiple institutions\n",
|
| 512 |
+
"6. Discuss intellectual property rights and their ethical implications\n",
|
| 513 |
+
"\n",
|
| 514 |
+
"## Improved Response\n",
|
| 515 |
+
"\n",
|
| 516 |
+
"# The Philanthropist's Dilemma: A Multidimensional Ethical Analysis\n",
|
| 517 |
+
"\n",
|
| 518 |
+
"This scenario presents not simply a binary choice but a complex ethical landscape involving multiple stakeholders, systemic factors, and competing values.\n",
|
| 519 |
+
"\n",
|
| 520 |
+
"## Stakeholder Analysis\n",
|
| 521 |
+
"\n",
|
| 522 |
+
"**Patients and families:**\n",
|
| 523 |
+
"- Those currently suffering need immediate access regardless of mechanism\n",
|
| 524 |
+
"- Future patients have interests in sustainable development of treatments\n",
|
| 525 |
+
"- Economic diversity among patients means affordability affects different groups unequally\n",
|
| 526 |
+
"\n",
|
| 527 |
+
"**Healthcare systems:**\n",
|
| 528 |
+
"- Must allocate limited resources across competing priorities\n",
|
| 529 |
+
"- High-priced drugs can strain budgets and force difficult coverage decisions\n",
|
| 530 |
+
"- Precedents set now affect future negotiations with pharmaceutical companies\n",
|
| 531 |
+
"\n",
|
| 532 |
+
"**Research community:**\n",
|
| 533 |
+
"- Incentives for developing treatments for rare diseases are influenced by such cases\n",
|
| 534 |
+
"- How intellectual property is handled affects future research priorities\n",
|
| 535 |
+
"\n",
|
| 536 |
+
"## Ethical Frameworks Worth Considering\n",
|
| 537 |
+
"\n",
|
| 538 |
+
"1. **Distributive justice**: Who should receive limited resources? What constitutes fair allocation?\n",
|
| 539 |
+
"2. **Rights-based approach**: Do patients have a right to life-saving medication regardless of cost?\n",
|
| 540 |
+
"3. **Consequentialist assessment**: Which option produces the best outcomes for the most people over time?\n",
|
| 541 |
+
"4. **Virtue ethics**: What would a virtuous philanthropist do in this situation?\n",
|
| 542 |
+
"5. **Global justice**: How does this decision affect healthcare equity across different regions?\n",
|
| 543 |
+
"\n",
|
| 544 |
+
"## Spectrum of Options\n",
|
| 545 |
+
"\n",
|
| 546 |
+
"Rather than two mutually exclusive choices, consider a spectrum of possibilities:\n",
|
| 547 |
+
"\n",
|
| 548 |
+
"1. **Direct manufacturing with tiered pricing**: Manufacture independently but implement income-based pricing to maximize access while maintaining sustainability\n",
|
| 549 |
+
"\n",
|
| 550 |
+
"2. **Conditional licensing**: License the formula with contractual price controls, distribution requirements, and accessibility guarantees\n",
|
| 551 |
+
"\n",
|
| 552 |
+
"3. **Public-private partnership**: Collaborate with governments, NGOs, and selected pharmaceutical partners to ensure broad, affordable access\n",
|
| 553 |
+
"\n",
|
| 554 |
+
"4. **Open-source approach**: Release the formula publicly with certain patent protections waived, while establishing a foundation to support manufacturing\n",
|
| 555 |
+
"\n",
|
| 556 |
+
"5. **Hybrid distribution model**: Manufacture for highest-need populations while licensing to reach others, using licensing revenues to subsidize direct manufacturing\n",
|
| 557 |
+
"\n",
|
| 558 |
+
"## Case Study Context\n",
|
| 559 |
+
"\n",
|
| 560 |
+
"Similar dilemmas have occurred with treatments for HIV/AIDS, hepatitis C, and rare genetic disorders. The outcomes suggest:\n",
|
| 561 |
+
"\n",
|
| 562 |
+
"- Maintaining some control over intellectual property while ensuring broad access often yields better public health outcomes than either extreme option\n",
|
| 563 |
+
"- Patient advocacy can significantly influence corporate behavior and pricing\n",
|
| 564 |
+
"- International differences in pricing and patent enforcement create complex dynamics\n",
|
| 565 |
+
"- Government intervention through negotiation, compulsory licensing, or regulation often becomes necessary\n",
|
| 566 |
+
"\n",
|
| 567 |
+
"## Systems-Level Considerations\n",
|
| 568 |
+
"\n",
|
| 569 |
+
"This dilemma exists within broader systemic issues:\n",
|
| 570 |
+
"\n",
|
| 571 |
+
"- The current pharmaceutical development model creates inherent tensions between innovation, access, and affordability\n",
|
| 572 |
+
"- Rare disease treatments highlight market failures in drug development\n",
|
| 573 |
+
"- Healthcare financing systems vary globally, affecting how we should evaluate \"accessibility\"\n",
|
| 574 |
+
"- Intellectual property regimes may require reform to better balance innovation incentives with public health needs\n",
|
| 575 |
+
"\n",
|
| 576 |
+
"## Recommended Approach\n",
|
| 577 |
+
"\n",
|
| 578 |
+
"The philanthropist should pursue a hybrid strategy that:\n",
|
| 579 |
+
"\n",
|
| 580 |
+
"1. Maintains sufficient control to ensure the most vulnerable patients receive treatment regardless of ability to pay\n",
|
| 581 |
+
"\n",
|
| 582 |
+
"2. Leverages partnerships with multiple entities (pharmaceutical companies, governments, NGOs) to maximize production scale and geographic reach\n",
|
| 583 |
+
"\n",
|
| 584 |
+
"3. Implements contractual safeguards on pricing, with particular attention to low and middle-income regions\n",
|
| 585 |
+
"\n",
|
| 586 |
+
"4. Establishes a patient assistance foundation using a portion of any licensing revenues\n",
|
| 587 |
+
"\n",
|
| 588 |
+
"5. Advocates for systemic reforms that would prevent such dilemmas in the future\n",
|
| 589 |
+
"\n",
|
| 590 |
+
"This approach recognizes that the philanthropist's responsibility extends beyond the immediate distribution decision to include consideration of precedent-setting effects, stakeholder equity, and systemic change—balancing immediate needs with long-term public health impact."
|
| 591 |
+
],
|
| 592 |
+
"text/plain": [
|
| 593 |
+
"<IPython.core.display.Markdown object>"
|
| 594 |
+
]
|
| 595 |
+
},
|
| 596 |
+
"metadata": {},
|
| 597 |
+
"output_type": "display_data"
|
| 598 |
+
},
|
| 599 |
+
{
|
| 600 |
+
"name": "stdout",
|
| 601 |
+
"output_type": "stream",
|
| 602 |
+
"text": [
|
| 603 |
+
"\n",
|
| 604 |
+
"==================================================\n",
|
| 605 |
+
"\n"
|
| 606 |
+
]
|
| 607 |
+
}
|
| 608 |
+
],
|
| 609 |
+
"source": [
|
| 610 |
+
"# Apply Reflection Pattern\n",
|
| 611 |
+
"reflected_responses = {}\n",
|
| 612 |
+
"\n",
|
| 613 |
+
"print(\"\\n=== RESPONSES AFTER REFLECTION ===\\n\")\n",
|
| 614 |
+
"\n",
|
| 615 |
+
"for model_name, client, is_anthropic in models:\n",
|
| 616 |
+
" if client and model_name in initial_responses:\n",
|
| 617 |
+
" try:\n",
|
| 618 |
+
" reflected = apply_reflection_pattern(\n",
|
| 619 |
+
" client, model_name, question, \n",
|
| 620 |
+
" initial_responses[model_name], is_anthropic\n",
|
| 621 |
+
" )\n",
|
| 622 |
+
" reflected_responses[model_name] = reflected\n",
|
| 623 |
+
" \n",
|
| 624 |
+
" print(f\"**{model_name} - After Reflection:**\")\n",
|
| 625 |
+
" display(Markdown(reflected))\n",
|
| 626 |
+
" print(\"\\n\" + \"=\"*50 + \"\\n\")\n",
|
| 627 |
+
" except Exception as e:\n",
|
| 628 |
+
" print(f\"Error with reflection for {model_name}: {e}\")"
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
{
|
| 632 |
+
"cell_type": "markdown",
|
| 633 |
+
"metadata": {},
|
| 634 |
+
"source": [
|
| 635 |
+
"## Step 4: Comparative Evaluation (Extended Judge Pattern)"
|
| 636 |
+
]
|
| 637 |
+
},
|
| 638 |
+
{
|
| 639 |
+
"cell_type": "code",
|
| 640 |
+
"execution_count": 9,
|
| 641 |
+
"metadata": {},
|
| 642 |
+
"outputs": [],
|
| 643 |
+
"source": [
|
| 644 |
+
"def create_comparative_evaluation(question, initial_responses, reflected_responses):\n",
|
| 645 |
+
" \"\"\"Create a comparative evaluation of responses before/after reflection\"\"\"\n",
|
| 646 |
+
" \n",
|
| 647 |
+
" evaluation_prompt = f\"\"\"\n",
|
| 648 |
+
"You are evaluating the effectiveness of the \"Reflection Pattern\" on the following question:\n",
|
| 649 |
+
"{question}\n",
|
| 650 |
+
"\n",
|
| 651 |
+
"For each model, you have:\n",
|
| 652 |
+
"1. An initial response\n",
|
| 653 |
+
"2. A response after self-reflection\n",
|
| 654 |
+
"\n",
|
| 655 |
+
"Analyze and compare:\n",
|
| 656 |
+
"- Depth of analysis\n",
|
| 657 |
+
"- Consideration of multiple perspectives\n",
|
| 658 |
+
"- Nuance and sophistication of reasoning\n",
|
| 659 |
+
"- Improvement brought by reflection\n",
|
| 660 |
+
"\n",
|
| 661 |
+
"MODELS TO EVALUATE:\n",
|
| 662 |
+
"\"\"\"\n",
|
| 663 |
+
" \n",
|
| 664 |
+
" for model_name in initial_responses:\n",
|
| 665 |
+
" if model_name in reflected_responses:\n",
|
| 666 |
+
" evaluation_prompt += f\"\"\"\n",
|
| 667 |
+
"## {model_name}\n",
|
| 668 |
+
"\n",
|
| 669 |
+
"### Initial response:\n",
|
| 670 |
+
"{initial_responses[model_name][:500]}...\n",
|
| 671 |
+
"\n",
|
| 672 |
+
"### Response after reflection:\n",
|
| 673 |
+
"{reflected_responses[model_name][:800]}...\n",
|
| 674 |
+
"\n",
|
| 675 |
+
"\"\"\"\n",
|
| 676 |
+
" \n",
|
| 677 |
+
" evaluation_prompt += \"\"\"\n",
|
| 678 |
+
"Respond with structured JSON:\n",
|
| 679 |
+
"{\n",
|
| 680 |
+
" \"general_analysis\": \"Your analysis of the Reflection Pattern's effectiveness\",\n",
|
| 681 |
+
" \"initial_ranking\": [\"best initially ranked model\", \"second\", \"third\"],\n",
|
| 682 |
+
" \"post_reflection_ranking\": [\"best ranked model after reflection\", \"second\", \"third\"],\n",
|
| 683 |
+
" \"most_improved\": \"Which model improved the most\",\n",
|
| 684 |
+
" \"insights\": \"Insights about the usefulness of the Reflection Pattern\"\n",
|
| 685 |
+
"}\n",
|
| 686 |
+
"\"\"\"\n",
|
| 687 |
+
" \n",
|
| 688 |
+
" return evaluation_prompt"
|
| 689 |
+
]
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"cell_type": "code",
|
| 693 |
+
"execution_count": 10,
|
| 694 |
+
"metadata": {},
|
| 695 |
+
"outputs": [
|
| 696 |
+
{
|
| 697 |
+
"name": "stdout",
|
| 698 |
+
"output_type": "stream",
|
| 699 |
+
"text": [
|
| 700 |
+
"\n",
|
| 701 |
+
"=== FINAL EVALUATION ===\n",
|
| 702 |
+
"\n",
|
| 703 |
+
"```json\n",
|
| 704 |
+
"{\n",
|
| 705 |
+
" \"general_analysis\": \"The Reflection Pattern effectively enhanced the depth of analysis and consideration of multiple perspectives in both models. However, the results differ in terms of sophistication and detail. The GPT-4 model provided initial observations that were relatively shallow but improved by incorporating logistical challenges and suggesting compromises during reflection. In contrast, Claude-3's initial response was more structured and sophisticated, covering a broader range of ethical frameworks, but still showed room for improvement regarding stakeholder analysis and long-term impacts.\",\n",
|
| 706 |
+
" \"initial_ranking\": [\"claude-3-7-sonnet-latest\", \"gpt-4o-mini\"],\n",
|
| 707 |
+
" \"post_reflection_ranking\": [\"claude-3-7-sonnet-latest\", \"gpt-4o-mini\"],\n",
|
| 708 |
+
" \"most_improved\": \"gpt-4o-mini\",\n",
|
| 709 |
+
" \"insights\": \"The Reflection Pattern revealed significant gaps in both models' initial analyses, encouraging deeper engagement with ethical implications and stakeholder considerations. It highlighted the importance of reflecting on logistical realities and the real-world impacts of decisions, marking it as a worthwhile practice for ethical dilemmas.\"\n",
|
| 710 |
+
"}\n",
|
| 711 |
+
"```\n",
|
| 712 |
+
"Could not parse JSON, raw output shown above\n"
|
| 713 |
+
]
|
| 714 |
+
}
|
| 715 |
+
],
|
| 716 |
+
"source": [
|
| 717 |
+
"# Final evaluation\n",
|
| 718 |
+
"if initial_responses and reflected_responses:\n",
|
| 719 |
+
" evaluation_prompt = create_comparative_evaluation(question, initial_responses, reflected_responses)\n",
|
| 720 |
+
" \n",
|
| 721 |
+
" judge_messages = [{\"role\": \"user\", \"content\": evaluation_prompt}]\n",
|
| 722 |
+
" \n",
|
| 723 |
+
" try:\n",
|
| 724 |
+
" judge_response = openai_client.chat.completions.create(\n",
|
| 725 |
+
" model=\"gpt-4o-mini\",\n",
|
| 726 |
+
" messages=judge_messages,\n",
|
| 727 |
+
" )\n",
|
| 728 |
+
" \n",
|
| 729 |
+
" evaluation_result = judge_response.choices[0].message.content\n",
|
| 730 |
+
" print(\"\\n=== FINAL EVALUATION ===\\n\")\n",
|
| 731 |
+
" print(evaluation_result)\n",
|
| 732 |
+
" \n",
|
| 733 |
+
" # Try to parse JSON for structured display\n",
|
| 734 |
+
" try:\n",
|
| 735 |
+
" eval_json = json.loads(evaluation_result)\n",
|
| 736 |
+
" print(\"\\n=== STRUCTURED RESULTS ===\\n\")\n",
|
| 737 |
+
" for key, value in eval_json.items():\n",
|
| 738 |
+
" print(f\"{key.replace('_', ' ').title()}: {value}\")\n",
|
| 739 |
+
" except:\n",
|
| 740 |
+
" print(\"Could not parse JSON, raw output shown above\")\n",
|
| 741 |
+
" \n",
|
| 742 |
+
" except Exception as e:\n",
|
| 743 |
+
" print(f\"Error during final evaluation: {e}\")"
|
| 744 |
+
]
|
| 745 |
+
},
|
| 746 |
+
{
|
| 747 |
+
"cell_type": "markdown",
|
| 748 |
+
"metadata": {},
|
| 749 |
+
"source": [
|
| 750 |
+
"## Simple Before/After Comparison"
|
| 751 |
+
]
|
| 752 |
+
},
|
| 753 |
+
{
|
| 754 |
+
"cell_type": "code",
|
| 755 |
+
"execution_count": 11,
|
| 756 |
+
"metadata": {},
|
| 757 |
+
"outputs": [
|
| 758 |
+
{
|
| 759 |
+
"name": "stdout",
|
| 760 |
+
"output_type": "stream",
|
| 761 |
+
"text": [
|
| 762 |
+
"\n",
|
| 763 |
+
"=== BEFORE vs AFTER COMPARISON ===\n",
|
| 764 |
+
"\n",
|
| 765 |
+
"\n",
|
| 766 |
+
"==================== GPT-4O-MINI ====================\n",
|
| 767 |
+
"\n",
|
| 768 |
+
"BEFORE REFLECTION:\n",
|
| 769 |
+
"--------------------------------------------------\n",
|
| 770 |
+
"This ethical dilemma presents a challenging decision for the philanthropist, who must weigh the immediate health needs of a few individuals against the broader societal implications of drug distribution and access.\n",
|
| 771 |
+
"\n",
|
| 772 |
+
"### Option 1: Prioritizing Immediate Health\n",
|
| 773 |
+
"\n",
|
| 774 |
+
"If the philanthropist chooses to manufa...\n",
|
| 775 |
+
"\n",
|
| 776 |
+
"AFTER REFLECTION:\n",
|
| 777 |
+
"--------------------------------------------------\n",
|
| 778 |
+
"This ethical dilemma presents the philanthropist with a complex decision regarding how best to utilize limited resources to maximize the benefit for individuals suffering from a rare but fatal disease. The two primary options – providing a low-cost supply to a select few or selling the formula for broader but costly distribution – both highlight significant ethical considerations.\n",
|
| 779 |
+
"\n",
|
| 780 |
+
"### Option 1: P...\n",
|
| 781 |
+
"\n",
|
| 782 |
+
"======================================================================\n",
|
| 783 |
+
"\n",
|
| 784 |
+
"\n",
|
| 785 |
+
"==================== CLAUDE-3-7-SONNET-LATEST ====================\n",
|
| 786 |
+
"\n",
|
| 787 |
+
"BEFORE REFLECTION:\n",
|
| 788 |
+
"--------------------------------------------------\n",
|
| 789 |
+
"# The Philanthropist's Dilemma\n",
|
| 790 |
+
"\n",
|
| 791 |
+
"This is a complex ethical dilemma that involves several important considerations:\n",
|
| 792 |
+
"\n",
|
| 793 |
+
"## Key Ethical Tensions\n",
|
| 794 |
+
"\n",
|
| 795 |
+
"- **Limited access at affordable prices** vs. **wider access at unaffordable prices**\n",
|
| 796 |
+
"- **Immediate relief for a few** vs. **potential long-term access for many...\n",
|
| 797 |
+
"\n",
|
| 798 |
+
"AFTER REFLECTION:\n",
|
| 799 |
+
"--------------------------------------------------\n",
|
| 800 |
+
"# The Philanthropist's Dilemma: A Multidimensional Ethical Analysis\n",
|
| 801 |
+
"\n",
|
| 802 |
+
"This scenario presents not simply a binary choice but a complex ethical landscape involving multiple stakeholders, systemic factors, and competing values.\n",
|
| 803 |
+
"\n",
|
| 804 |
+
"## Stakeholder Analysis\n",
|
| 805 |
+
"\n",
|
| 806 |
+
"**Patients and families:**\n",
|
| 807 |
+
"- Those currently suffering need immediate access regardless of mechanism\n",
|
| 808 |
+
"- Future patients have interests in sustainable d...\n",
|
| 809 |
+
"\n",
|
| 810 |
+
"======================================================================\n",
|
| 811 |
+
"\n"
|
| 812 |
+
]
|
| 813 |
+
}
|
| 814 |
+
],
|
| 815 |
+
"source": [
|
| 816 |
+
"# Display side-by-side comparison for each model\n",
|
| 817 |
+
"print(\"\\n=== BEFORE vs AFTER COMPARISON ===\\n\")\n",
|
| 818 |
+
"\n",
|
| 819 |
+
"for model_name in initial_responses:\n",
|
| 820 |
+
" if model_name in reflected_responses:\n",
|
| 821 |
+
" print(f\"\\n{'='*20} {model_name.upper()} {'='*20}\\n\")\n",
|
| 822 |
+
" \n",
|
| 823 |
+
" print(\"BEFORE REFLECTION:\")\n",
|
| 824 |
+
" print(\"-\" * 50)\n",
|
| 825 |
+
" print(initial_responses[model_name][:300] + \"...\")\n",
|
| 826 |
+
" \n",
|
| 827 |
+
" print(\"\\nAFTER REFLECTION:\")\n",
|
| 828 |
+
" print(\"-\" * 50)\n",
|
| 829 |
+
" # Extract just the \"Improved Response\" section if it exists\n",
|
| 830 |
+
" reflected = reflected_responses[model_name]\n",
|
| 831 |
+
" if \"## Improved Response\" in reflected:\n",
|
| 832 |
+
" improved_section = reflected.split(\"## Improved Response\")[1].strip()\n",
|
| 833 |
+
" print(improved_section[:400] + \"...\")\n",
|
| 834 |
+
" else:\n",
|
| 835 |
+
" print(reflected[:400] + \"...\")\n",
|
| 836 |
+
" \n",
|
| 837 |
+
" print(\"\\n\" + \"=\"*70 + \"\\n\")"
|
| 838 |
+
]
|
| 839 |
+
},
|
| 840 |
+
{
|
| 841 |
+
"cell_type": "markdown",
|
| 842 |
+
"metadata": {},
|
| 843 |
+
"source": [
|
| 844 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 845 |
+
" <tr>\n",
|
| 846 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 847 |
+
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 848 |
+
" </td>\n",
|
| 849 |
+
" <td>\n",
|
| 850 |
+
" <h2 style=\"color:#ff7800;\">Pattern Analysis</h2>\n",
|
| 851 |
+
" <span style=\"color:#ff7800;\">\n",
|
| 852 |
+
" <b>Patterns used:</b><br/>\n",
|
| 853 |
+
" 1. <b>Multi-Model Comparison:</b> Comparing multiple models on the same task<br/>\n",
|
| 854 |
+
" 2. <b>Judge/Evaluator:</b> Using a model to evaluate performances<br/>\n",
|
| 855 |
+
" 3. <b>Reflection (NEW):</b> Self-critique and improvement of responses<br/><br/>\n",
|
| 856 |
+
" <b>Possible experiments:</b><br/>\n",
|
| 857 |
+
" - Iterate the Reflection Pattern multiple times<br/>\n",
|
| 858 |
+
" - Add a \"Debate Pattern\" between models<br/>\n",
|
| 859 |
+
" - Implement a \"Consensus Pattern\"\n",
|
| 860 |
+
" </span>\n",
|
| 861 |
+
" </td>\n",
|
| 862 |
+
" </tr>\n",
|
| 863 |
+
"</table>"
|
| 864 |
+
]
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"cell_type": "markdown",
|
| 868 |
+
"metadata": {},
|
| 869 |
+
"source": [
|
| 870 |
+
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 871 |
+
" <tr>\n",
|
| 872 |
+
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 873 |
+
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 874 |
+
" </td>\n",
|
| 875 |
+
" <td>\n",
|
| 876 |
+
" <h2 style=\"color:#00bfff;\">Commercial Applications</h2>\n",
|
| 877 |
+
" <span style=\"color:#00bfff;\">\n",
|
| 878 |
+
" The <b>Reflection Pattern</b> is particularly valuable for:<br/>\n",
|
| 879 |
+
" • Improving quality of complex analyses<br/>\n",
|
| 880 |
+
" • Reducing bias in AI recommendations<br/>\n",
|
| 881 |
+
" • Creating self-improving systems<br/>\n",
|
| 882 |
+
" • Developing more robust AI for critical decisions<br/><br/>\n",
|
| 883 |
+
" Use cases: Strategic consulting, risk analysis, ethical evaluation, medical diagnosis\n",
|
| 884 |
+
" </span>\n",
|
| 885 |
+
" </td>\n",
|
| 886 |
+
" </tr>\n",
|
| 887 |
+
"</table>"
|
| 888 |
+
]
|
| 889 |
+
},
|
| 890 |
+
{
|
| 891 |
+
"cell_type": "markdown",
|
| 892 |
+
"metadata": {},
|
| 893 |
+
"source": [
|
| 894 |
+
"## Additional Pattern Ideas for Future Implementation"
|
| 895 |
+
]
|
| 896 |
+
},
|
| 897 |
+
{
|
| 898 |
+
"cell_type": "code",
|
| 899 |
+
"execution_count": 12,
|
| 900 |
+
"metadata": {},
|
| 901 |
+
"outputs": [
|
| 902 |
+
{
|
| 903 |
+
"name": "stdout",
|
| 904 |
+
"output_type": "stream",
|
| 905 |
+
"text": [
|
| 906 |
+
"Exercise completed! Analyze the results to see the impact of the Reflection Pattern.\n"
|
| 907 |
+
]
|
| 908 |
+
}
|
| 909 |
+
],
|
| 910 |
+
"source": [
|
| 911 |
+
"# 1. Chain of Thought Pattern\n",
|
| 912 |
+
"\"\"\"\n",
|
| 913 |
+
"Add a pattern that asks models to show their reasoning step by step:\n",
|
| 914 |
+
"\n",
|
| 915 |
+
"def apply_chain_of_thought_pattern(client, question):\n",
|
| 916 |
+
" prompt = f\\\"\n",
|
| 917 |
+
" Question: {question}\n",
|
| 918 |
+
" \n",
|
| 919 |
+
" Please think through this step by step:\n",
|
| 920 |
+
" Step 1: [Identify the key issues]\n",
|
| 921 |
+
" Step 2: [Consider different perspectives]\n",
|
| 922 |
+
" Step 3: [Evaluate potential consequences]\n",
|
| 923 |
+
" Step 4: [Provide reasoned conclusion]\n",
|
| 924 |
+
" \\\"\n",
|
| 925 |
+
" return get_response(client, prompt)\n",
|
| 926 |
+
"\"\"\"\n",
|
| 927 |
+
"\n",
|
| 928 |
+
"# 2. Iterative Refinement Pattern\n",
|
| 929 |
+
"\"\"\"\n",
|
| 930 |
+
"Create a loop that progressively improves the response over multiple iterations:\n",
|
| 931 |
+
"\n",
|
| 932 |
+
"def iterative_refinement(client, question, iterations=3):\n",
|
| 933 |
+
" response = get_initial_response(client, question)\n",
|
| 934 |
+
" for i in range(iterations):\n",
|
| 935 |
+
" critique_prompt = f\\\"Improve this response: {response}\\\"\n",
|
| 936 |
+
" response = get_response(client, critique_prompt)\n",
|
| 937 |
+
" return response\n",
|
| 938 |
+
"\"\"\"\n",
|
| 939 |
+
"\n",
|
| 940 |
+
"# 3. Debate Pattern\n",
|
| 941 |
+
"\"\"\"\n",
|
| 942 |
+
"Make two models debate their respective responses:\n",
|
| 943 |
+
"\n",
|
| 944 |
+
"def create_debate(client1, client2, question):\n",
|
| 945 |
+
" response1 = get_response(client1, question)\n",
|
| 946 |
+
" response2 = get_response(client2, question)\n",
|
| 947 |
+
" \n",
|
| 948 |
+
" debate_prompt1 = f\\\"Argue against this position: {response2}\\\"\n",
|
| 949 |
+
" debate_prompt2 = f\\\"Argue against this position: {response1}\\\"\n",
|
| 950 |
+
" \n",
|
| 951 |
+
" counter1 = get_response(client1, debate_prompt1)\n",
|
| 952 |
+
" counter2 = get_response(client2, debate_prompt2)\n",
|
| 953 |
+
" \n",
|
| 954 |
+
" return counter1, counter2\n",
|
| 955 |
+
"\"\"\"\n",
|
| 956 |
+
"\n",
|
| 957 |
+
"# 4. Consensus Building Pattern\n",
|
| 958 |
+
"\"\"\"\n",
|
| 959 |
+
"Attempt to create a consensus response based on all individual responses:\n",
|
| 960 |
+
"\n",
|
| 961 |
+
"def build_consensus(all_responses, question):\n",
|
| 962 |
+
" consensus_prompt = f\\\"\n",
|
| 963 |
+
" Original question: {question}\n",
|
| 964 |
+
" \n",
|
| 965 |
+
" Here are multiple expert responses:\n",
|
| 966 |
+
" {all_responses}\n",
|
| 967 |
+
" \n",
|
| 968 |
+
" Create a consensus response that incorporates the best insights from all responses\n",
|
| 969 |
+
" while resolving contradictions.\n",
|
| 970 |
+
" \\\"\n",
|
| 971 |
+
" return get_response(openai_client, consensus_prompt)\n",
|
| 972 |
+
"\"\"\"\n",
|
| 973 |
+
"\n",
|
| 974 |
+
"print(\"Exercise completed! Analyze the results to see the impact of the Reflection Pattern.\")"
|
| 975 |
+
]
|
| 976 |
+
}
|
| 977 |
+
],
|
| 978 |
+
"metadata": {
|
| 979 |
+
"kernelspec": {
|
| 980 |
+
"display_name": ".venv",
|
| 981 |
+
"language": "python",
|
| 982 |
+
"name": "python3"
|
| 983 |
+
},
|
| 984 |
+
"language_info": {
|
| 985 |
+
"codemirror_mode": {
|
| 986 |
+
"name": "ipython",
|
| 987 |
+
"version": 3
|
| 988 |
+
},
|
| 989 |
+
"file_extension": ".py",
|
| 990 |
+
"mimetype": "text/x-python",
|
| 991 |
+
"name": "python",
|
| 992 |
+
"nbconvert_exporter": "python",
|
| 993 |
+
"pygments_lexer": "ipython3",
|
| 994 |
+
"version": "3.12.11"
|
| 995 |
+
}
|
| 996 |
+
},
|
| 997 |
+
"nbformat": 4,
|
| 998 |
+
"nbformat_minor": 4
|
| 999 |
+
}
|