From 021d6af35a9cf7198cc0b0b8390f10417399cf7d Mon Sep 17 00:00:00 2001
From: Ilia Manolov <Ilia Manolov>
Date: Wed, 21 May 2025 15:43:57 +0100
Subject: [PATCH 1/6] Sync forge build demo

---
 global_config/global_config.yaml |   6 +-
 pyproject.toml                   |   4 +-
 requirements-dev.lock            |  34 ++++++
 requirements.lock                |   2 +
 utils/llm/demo.py                | 188 +++++++++++++++++++++++++++++++
 utils/llm/demo2.py               |  30 +++++
 6 files changed, 260 insertions(+), 4 deletions(-)
 create mode 100644 utils/llm/demo.py
 create mode 100644 utils/llm/demo2.py
diff --git a/global_config/global_config.yaml b/global_config/global_config.yaml
index afeba8a..77e5129 100644
--- a/global_config/global_config.yaml
+++ b/global_config/global_config.yaml
@@ -5,9 +5,9 @@ dot_global_config_health_check: true
 # LLMs
 ########################################################
 default_llm:
-  default_model: gemini/gemini-2.0-flash
-  default_temperature: 0.5
-  default_max_tokens: 100000
+  default_model: openai/gpt-4.1
+  default_temperature: 0.3
+  default_max_tokens: 32000
 
 llm_config:
   cache_enabled: false
diff --git a/pyproject.toml b/pyproject.toml
index 7b6700a..a6ebf95 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,9 @@ build-backend = "hatchling.build"
 
 [tool.rye]
 managed = true
-dev-dependencies = []
+dev-dependencies = [
+    "ipython>=9.2.0",
+]
 
 [tool.hatch.metadata]
 allow-direct-references = true
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 6741a3c..e7be393 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -28,6 +28,8 @@ anyio==4.9.0
     # via httpx
     # via langfuse
     # via openai
+asttokens==3.0.0
+    # via stack-data
 asyncer==0.0.8
     # via dspy
 attrs==25.3.0
@@ -58,6 +60,8 @@ colorlog==6.9.0
     # via optuna
 datasets==3.6.0
     # via dspy
+decorator==5.2.1
+    # via ipython
 dill==0.3.8
     # via datasets
     # via multiprocess
@@ -67,6 +71,8 @@ distro==1.9.0
     # via openai
 dspy==2.6.24
     # via python-template
+executing==2.2.0
+    # via stack-data
 filelock==3.18.0
     # via datasets
     # via huggingface-hub
@@ -80,6 +86,8 @@ google-auth==2.40.1
     # via google-genai
 google-genai==1.15.0
     # via python-template
+greenlet==3.2.2
+    # via sqlalchemy
 h11==0.16.0
     # via httpcore
 httpcore==1.0.9
@@ -104,6 +112,11 @@ importlib-metadata==8.7.0
     # via litellm
 iniconfig==2.0.0
     # via pytest
+ipython==9.2.0
+ipython-pygments-lexers==1.1.1
+    # via ipython
+jedi==0.19.2
+    # via ipython
 jinja2==3.1.6
     # via litellm
 jiter==0.9.0
@@ -132,6 +145,8 @@ markdown-it-py==3.0.0
 markupsafe==3.0.2
     # via jinja2
     # via mako
+matplotlib-inline==0.1.7
+    # via ipython
 mdurl==0.1.2
     # via markdown-it-py
 multidict==6.4.3
@@ -161,17 +176,27 @@ packaging==24.1
 pandas==2.2.3
     # via datasets
     # via dspy
+parso==0.8.4
+    # via jedi
 pathspec==0.12.1
     # via black
+pexpect==4.9.0
+    # via ipython
 pillow==11.2.1
     # via python-template
 platformdirs==4.3.6
     # via black
 pluggy==1.5.0
     # via pytest
+prompt-toolkit==3.0.51
+    # via ipython
 propcache==0.3.1
     # via aiohttp
     # via yarl
+ptyprocess==0.7.0
+    # via pexpect
+pure-eval==0.2.3
+    # via stack-data
 pyarrow==20.0.0
     # via datasets
 pyasn1==0.6.1
@@ -188,6 +213,8 @@ pydantic==2.11.4
 pydantic-core==2.33.2
     # via pydantic
 pygments==2.19.1
+    # via ipython
+    # via ipython-pygments-lexers
     # via rich
 pytest==8.3.3
     # via python-template
@@ -231,6 +258,8 @@ sniffio==1.3.1
 sqlalchemy==2.0.41
     # via alembic
     # via optuna
+stack-data==0.6.3
+    # via ipython
 tenacity==9.1.2
     # via dspy
     # via python-template
@@ -246,6 +275,9 @@ tqdm==4.67.1
     # via huggingface-hub
     # via openai
     # via optuna
+traitlets==5.14.3
+    # via ipython
+    # via matplotlib-inline
 typing-extensions==4.13.2
     # via alembic
     # via anyio
@@ -267,6 +299,8 @@ urllib3==2.4.0
     # via requests
 vulture==2.14
     # via python-template
+wcwidth==0.2.13
+    # via prompt-toolkit
 websockets==15.0.1
     # via google-genai
 wrapt==1.17.2
diff --git a/requirements.lock b/requirements.lock
index 6741a3c..ec14121 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -80,6 +80,8 @@ google-auth==2.40.1
     # via google-genai
 google-genai==1.15.0
     # via python-template
+greenlet==3.2.2
+    # via sqlalchemy
 h11==0.16.0
     # via httpcore
 httpcore==1.0.9
diff --git a/utils/llm/demo.py b/utils/llm/demo.py
new file mode 100644
index 0000000..67ca7cd
--- /dev/null
+++ b/utils/llm/demo.py
@@ -0,0 +1,188 @@
+from utils.llm.dspy_inference import DSPYInference
+import dspy
+import asyncio
+import os
+import subprocess
+from pathlib import Path
+approved_commands = [
+    'forge',
+    'npm',
+    'solc',
+    'foundryup',
+    'pnpm',
+    'yarn',
+    'git submodule',
+    'nvm'
+]
+
+repo_dir = Path('/home/imanolov/git/Python-Template/resources/v2-core-public-cantina')
+
+
+def list_files_tool(relative_path: str | None = None) -> list[str] | str:
+    """List all files in the given relative path."""
+
+    if relative_path is None:
+        relative_path = '.'
+
+    if '..' in relative_path:
+        print("LLM tried to access parent directory! Returning empty list.")
+        return 'Restricted: Tried to access parent directory!'
+
+    absolute_path = repo_dir / relative_path
+
+    if not os.path.exists(absolute_path):
+        print(f"Directory {relative_path} does not exist! Returning empty list.")
+        return 'Error: Directory {relative_path} does not exist!'
+
+    print("Listing files in: " + relative_path)
+    return [f for f in os.listdir(absolute_path) if os.path.isfile(os.path.join(absolute_path, f))]
+
+def list_folders_tool(relative_path: str | None = None) -> list[str] | str:
+    """List all folders in the given relative path."""
+
+    if relative_path is None:
+        relative_path = '.'
+
+    if '..' in relative_path:
+        print("LLM tried to access parent directory! Returning empty list.")
+        return 'Restricted: Tried to access parent directory!'
+
+    absolute_path = repo_dir / relative_path
+
+    if not os.path.exists(absolute_path):
+        print(f"Directory {relative_path} does not exist! Returning empty list.")
+        return 'Error: Directory {relative_path} does not exist!'
+
+    print("Listing folders in: " + relative_path)
+    return [f for f in os.listdir(absolute_path) if os.path.isdir(os.path.join(absolute_path, f))]
+
+def read_file_tool(relative_path: str) -> str:
+    """Read the contents of the given file."""
+
+    if '..' in relative_path:
+        print("LLM tried to access parent directory!")
+        return 'Restricted: Tried to access parent directory!'
+    
+    absolute_path = repo_dir / relative_path
+
+    if not os.path.exists(absolute_path):
+        print(f"File {relative_path} does not exist!")
+        return 'Error: File {relative_path} does not exist!'
+    
+    with open(absolute_path, 'r') as file:
+        print("File read successfully: " + relative_path)
+        return file.read()
+    
+def execute_command_tool(command: str, relative_path: str = '.') -> tuple[str, int]:
+    """Execute the given command inside of the given relative path and return the output, "done" if the command was successful and the return code."""
+
+    if not any(command.startswith(cmd) for cmd in approved_commands):
+        print("Command must start with one of the following: " + ', '.join(approved_commands) + "!")
+        return 'Error: Command must start with one of the following: ' + ', '.join(approved_commands), 1
+    
+    if '..' in relative_path:
+        print("LLM tried to access parent directory!")
+        return 'Restricted: Tried to access parent directory!'
+    
+    absolute_path = repo_dir / relative_path
+
+    if not os.path.exists(absolute_path):
+        print(f"Directory {relative_path} does not exist!")
+        return 'Error: Directory {relative_path} does not exist!'
+
+    try:
+        print(f"Executing command: {command} in {relative_path}")
+        result = subprocess.run(command, shell=True, cwd=absolute_path, capture_output=True, text=True)
+        print("Command executed: " + command)
+        if result.returncode != 0:
+            return 'Error: ' + result.stderr, result.returncode
+        return result.stdout + '\ndone', result.returncode
+    except subprocess.CalledProcessError as e:
+        return 'Error: ' + str(e), 1
+    
+def prompt_modify_file_tool(relative_path: str, new_content: str, oneline_modification_reason: str) -> str:
+    """Modify the given file using the given prompt."""
+
+    if '..' in relative_path:
+        print("LLM tried to access parent directory! Returning empty string.")
+        return 'Restricted: Tried to access parent directory!'
+    
+    absolute_path = repo_dir / relative_path
+    
+    if not os.path.exists(absolute_path):
+        print(f"File {relative_path} does not exist! Returning empty string.")
+        return 'Error: File {relative_path} does not exist!'
+    
+    print(f"LLM wants to modify file {relative_path}")
+    print(f"Reason: {oneline_modification_reason}")
+    print("Do you want to continue? (y/n)")
+    response = input().lower()
+    if response != 'y':
+        print("Modification cancelled by user.")
+        return 'Failure: Modification cancelled by user!'
+    
+    with open(absolute_path, 'w') as file:
+        file.write(new_content)
+        
+    return 'Success: File modified!'
+
+def final_check_before_completion() -> str:
+    """Always run this tool before finishing. This tool will tell you whether you are done or not."""
+
+    print('Trying to finish up...')
+
+    result1, returncode1 = execute_command_tool('forge build')
+    if returncode1 != 0:
+        return 'Failure: Forge build failed! Please continue getting the repo to build. Here is your output: ' + result1
+    
+    print('Forge build passed! Trying to run tests...')
+
+    result2, returncode2 = execute_command_tool('forge test')
+
+    if returncode2 != 0:
+        return 'Failure: Forge test failed! Please continue getting the repo tests to run. Here is your output: ' + result2
+    
+    print('Forge test passed! Should exit now...')
+    
+    return 'Success: Forge build and test passed!'
+
+class RunCommands(dspy.Signature):
+    """Run commands in the given directory."""
+
+    high_level_instructions: str = dspy.InputField()
+    forge_build_runs: bool = dspy.OutputField()
+    forge_test_runs: bool = dspy.OutputField()
+    ran_commands: list[str] = dspy.OutputField()
+
+
+subprocess.run(['git', 'reset', '--hard'], cwd=repo_dir)
+subprocess.run(['git', 'clean', '-fd'], cwd=repo_dir)
+
+inf_module = DSPYInference(
+    pred_signature=RunCommands,
+    tools=[list_files_tool, list_folders_tool, read_file_tool, execute_command_tool, prompt_modify_file_tool, final_check_before_completion],
+)
+
+result = asyncio.run(inf_module.run(
+    high_level_instructions=f"""
+        You are a professional smart contract security engineer.
+        Your current directory is inside of a git repository of a Foundry Forge Solidity project.
+        Your job is to build the project and run the tests.
+        You are allowed to read through files and folders in the project to understand it.
+        You can additionally only run the following commands at the root of the project:
+        {', '.join(approved_commands)}
+        You will not combine commands with && or ||.
+        You will always try to use the non-interactive or json versions of the commands.
+        You are allowed to use the help parameter of the commands if you want to understand them better.
+        When supplying any relative path to a tool you will never use `..`. This is forbidden and will cause the tool to fail.
+        You are not allowed to move freely using `cd`.
+        You can additionally modify any file in the project that you have already read.
+        Your goal is to run `forge build` successfully and to run `forge test` (which must complete but may have failing tests).
+        You will not give up until these commands run successfully.
+        You only have one shot at this so make sure to do it right.
+        You should start by finding and opening the readme. Then, you should see what other files and folders are in the project.
+        Your response will always finish with a call to `final_check_before_completion` that returns a success.
+    """
+))
+
+print(result.ran_commands)
diff --git a/utils/llm/demo2.py b/utils/llm/demo2.py
new file mode 100644
index 0000000..f33ae90
--- /dev/null
+++ b/utils/llm/demo2.py
@@ -0,0 +1,30 @@
+from utils.llm.dspy_inference import DSPYInference
+import dspy
+import asyncio
+
+class ExtractInfo(dspy.Signature):
+    """Extract structured information from text."""
+    text: str = dspy.InputField(desc="The text to extract information from")
+    title: str = dspy.OutputField(desc="The title of the text")
+    headings: list[str] = dspy.OutputField(desc="A list of headings in the text")
+    entities: list[dict[str, str]] = dspy.OutputField(
+        desc="a list of entities and their metadata"
+    )
+
+def web_search(query: str) -> str:
+    """Search the web for information."""
+    return "This is a test"
+
+inf_module = DSPYInference(
+    pred_signature=ExtractInfo,
+    tools=[web_search]
+)
+
+result = asyncio.run(inf_module.run(
+    text="Apple Inc. announced its latest iPhone 14 today."
+    "The CEO, Tim Cook, highlighted its new features in a press release."
+))
+
+print(result.title)
+print(result.headings)
+print(result.entities)
\ No newline at end of file

From 1e298921a3cbb41e166f7f4a3cd957bc2e049c7f Mon Sep 17 00:00:00 2001
From: Ilia Manolov <Ilia Manolov>
Date: Wed, 21 May 2025 17:23:44 +0100
Subject: [PATCH 2/6] Working demo!

---
 global_config/global_config.yaml |  6 +++---
 utils/llm/demo.py                | 30 ++++++++++++++++--------------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/global_config/global_config.yaml b/global_config/global_config.yaml
index 77e5129..81b440a 100644
--- a/global_config/global_config.yaml
+++ b/global_config/global_config.yaml
@@ -5,9 +5,9 @@ dot_global_config_health_check: true
 # LLMs
 ########################################################
 default_llm:
-  default_model: openai/gpt-4.1
-  default_temperature: 0.3
-  default_max_tokens: 32000
+  default_model: gemini/gemini-2.5-flash-preview-05-20
+  default_temperature: 0.5
+  default_max_tokens: 1000000
 
 llm_config:
   cache_enabled: false
diff --git a/utils/llm/demo.py b/utils/llm/demo.py
index 67ca7cd..80a3e5e 100644
--- a/utils/llm/demo.py
+++ b/utils/llm/demo.py
@@ -73,35 +73,35 @@ def read_file_tool(relative_path: str) -> str:
         print("File read successfully: " + relative_path)
         return file.read()
     
-def execute_command_tool(command: str, relative_path: str = '.') -> tuple[str, int]:
-    """Execute the given command inside of the given relative path and return the output, "done" if the command was successful and the return code."""
+def execute_command_tool(command: str, relative_path: str = '.', accept_nonzero_return_code: bool = False) -> tuple[str, int]:
+    """Execute the given command inside of the given relative path and whether a non-zero return code is acceptable. Returns the output with "done" appended if the command was successful and the return code."""
 
+    print(f"Attempting to execute command: {command} in {relative_path}")
     if not any(command.startswith(cmd) for cmd in approved_commands):
         print("Command must start with one of the following: " + ', '.join(approved_commands) + "!")
         return 'Error: Command must start with one of the following: ' + ', '.join(approved_commands), 1
     
     if '..' in relative_path:
         print("LLM tried to access parent directory!")
-        return 'Restricted: Tried to access parent directory!'
+        return 'Restricted: Tried to access parent directory!', 1
     
     absolute_path = repo_dir / relative_path
 
     if not os.path.exists(absolute_path):
         print(f"Directory {relative_path} does not exist!")
-        return 'Error: Directory {relative_path} does not exist!'
+        return 'Error: Directory {relative_path} does not exist!', 1
 
     try:
-        print(f"Executing command: {command} in {relative_path}")
         result = subprocess.run(command, shell=True, cwd=absolute_path, capture_output=True, text=True)
         print("Command executed: " + command)
-        if result.returncode != 0:
-            return 'Error: ' + result.stderr, result.returncode
-        return result.stdout + '\ndone', result.returncode
+        if not accept_nonzero_return_code and result.returncode != 0:
+            return 'Error: ' + result.stderr + '\nOutput: ' + result.stdout, result.returncode
+        return 'Output: ' + result.stdout + '\ndone', result.returncode
     except subprocess.CalledProcessError as e:
         return 'Error: ' + str(e), 1
     
 def prompt_modify_file_tool(relative_path: str, new_content: str, oneline_modification_reason: str) -> str:
-    """Modify the given file using the given prompt."""
+    """Overwrite an existing file with the given new content. The file will be modified in place."""
 
     if '..' in relative_path:
         print("LLM tried to access parent directory! Returning empty string.")
@@ -111,7 +111,7 @@ def prompt_modify_file_tool(relative_path: str, new_content: str, oneline_modifi
     
     if not os.path.exists(absolute_path):
         print(f"File {relative_path} does not exist! Returning empty string.")
-        return 'Error: File {relative_path} does not exist!'
+        return f'Error: File {relative_path} does not exist!'
     
     print(f"LLM wants to modify file {relative_path}")
     print(f"Reason: {oneline_modification_reason}")
@@ -137,9 +137,9 @@ def final_check_before_completion() -> str:
     
     print('Forge build passed! Trying to run tests...')
 
-    result2, returncode2 = execute_command_tool('forge test')
+    result2, returncode2 = execute_command_tool('forge test', accept_nonzero_return_code=True)
 
-    if returncode2 != 0:
+    if result2.startswith('Error:') or result2.startswith('Failure:') or result2.startswith('Restricted:'):
         return 'Failure: Forge test failed! Please continue getting the repo tests to run. Here is your output: ' + result2
     
     print('Forge test passed! Should exit now...')
@@ -156,11 +156,12 @@ class RunCommands(dspy.Signature):
 
 
 subprocess.run(['git', 'reset', '--hard'], cwd=repo_dir)
-subprocess.run(['git', 'clean', '-fd'], cwd=repo_dir)
+subprocess.run(['git', 'clean', '-fdx'], cwd=repo_dir)
 
 inf_module = DSPYInference(
-    pred_signature=RunCommands,
+    pred_signature=RunCommands(),
     tools=[list_files_tool, list_folders_tool, read_file_tool, execute_command_tool, prompt_modify_file_tool, final_check_before_completion],
+    max_iters=20,
 )
 
 result = asyncio.run(inf_module.run(
@@ -179,6 +180,7 @@ class RunCommands(dspy.Signature):
         You can additionally modify any file in the project that you have already read.
         Your goal is to run `forge build` successfully and to run `forge test` (which must complete but may have failing tests).
         You will not give up until these commands run successfully.
+        You will never run any commands that open ports or a shell.
         You only have one shot at this so make sure to do it right.
         You should start by finding and opening the readme. Then, you should see what other files and folders are in the project.
         Your response will always finish with a call to `final_check_before_completion` that returns a success.

From 57588ae20cb004ac7570144f8f154974f8f25fd2 Mon Sep 17 00:00:00 2001
From: Ilia Manolov <Ilia Manolov>
Date: Wed, 21 May 2025 17:24:37 +0100
Subject: [PATCH 3/6] Some type fixes

---
 .gitignore                     |  3 +++
 global_config/global_config.py |  4 ++--
 utils/llm/demo2.py             | 30 ------------------------------
 utils/llm/dspy_inference.py    |  8 +++++---
 utils/llm/dspy_langfuse.py     |  6 ++++--
 5 files changed, 14 insertions(+), 37 deletions(-)
 delete mode 100644 utils/llm/demo2.py

diff --git a/.gitignore b/.gitignore
index 26eab8e..3325bb3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,3 +165,6 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+
+.vscode/
\ No newline at end of file
diff --git a/global_config/global_config.py b/global_config/global_config.py
index f8bd0ad..1080055 100644
--- a/global_config/global_config.py
+++ b/global_config/global_config.py
@@ -100,7 +100,7 @@ def unwrap(obj):
 
         return {k: unwrap(v) for k, v in self.__dict__.items()}
 
-    def llm_api_key(self, model_name: str = None) -> str:
+    def llm_api_key(self, model_name: str | None = None) -> str:
         """Returns the appropriate API key based on the model name."""
 
         model_identifier = model_name or self.model_name
@@ -122,7 +122,7 @@ def llm_api_key(self, model_name: str = None) -> str:
         else:
             raise ValueError(f"No API key configured for model: {model_identifier}")
 
-    def api_base(self, model_name: str) -> str:
+    def api_base(self, model_name: str) -> str | None:
         """Returns the Helicone link for the model."""
         if "gpt" in model_name.lower() or re.match(
             OPENAI_O_SERIES_PATTERN, model_name.lower()
diff --git a/utils/llm/demo2.py b/utils/llm/demo2.py
deleted file mode 100644
index f33ae90..0000000
--- a/utils/llm/demo2.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from utils.llm.dspy_inference import DSPYInference
-import dspy
-import asyncio
-
-class ExtractInfo(dspy.Signature):
-    """Extract structured information from text."""
-    text: str = dspy.InputField(desc="The text to extract information from")
-    title: str = dspy.OutputField(desc="The title of the text")
-    headings: list[str] = dspy.OutputField(desc="A list of headings in the text")
-    entities: list[dict[str, str]] = dspy.OutputField(
-        desc="a list of entities and their metadata"
-    )
-
-def web_search(query: str) -> str:
-    """Search the web for information."""
-    return "This is a test"
-
-inf_module = DSPYInference(
-    pred_signature=ExtractInfo,
-    tools=[web_search]
-)
-
-result = asyncio.run(inf_module.run(
-    text="Apple Inc. announced its latest iPhone 14 today."
-    "The CEO, Tim Cook, highlighted its new features in a press release."
-))
-
-print(result.title)
-print(result.headings)
-print(result.entities)
\ No newline at end of file
diff --git a/utils/llm/dspy_inference.py b/utils/llm/dspy_inference.py
index 3f0b8dc..422d22f 100644
--- a/utils/llm/dspy_inference.py
+++ b/utils/llm/dspy_inference.py
@@ -1,4 +1,4 @@
-from typing import Callable
+from typing import Callable, Awaitable, Any
 import dspy
 from global_config import global_config
 
@@ -10,7 +10,7 @@
     retry_if_exception_type,
 )
 from utils.llm.dspy_langfuse import LangFuseDSPYCallback
-from litellm import ServiceUnavailableError
+from litellm.exceptions import ServiceUnavailableError
 from langfuse.decorators import observe
 
 
@@ -23,6 +23,7 @@ def __init__(
         model_name: str = global_config.default_llm.default_model,
         temperature: float = global_config.default_llm.default_temperature,
         max_tokens: int = global_config.default_llm.default_max_tokens,
+        max_iters: int = 5,
     ):
         api_key = global_config.llm_api_key(model_name)
         self.lm = dspy.LM(
@@ -44,10 +45,11 @@ def __init__(
             self.inference_module = dspy.ReAct(
                 pred_signature,
                 tools=tools,  # Uses tools as passed, no longer appends read_memory
+                max_iters=max_iters,
             )
         else:
             self.inference_module = dspy.Predict(pred_signature)
-        self.inference_module_async = dspy.asyncify(self.inference_module)
+        self.inference_module_async: Callable[..., Awaitable[Any]] = dspy.asyncify(self.inference_module)
 
     @observe()
     @retry(
diff --git a/utils/llm/dspy_langfuse.py b/utils/llm/dspy_langfuse.py
index af5d417..f3f4470 100644
--- a/utils/llm/dspy_langfuse.py
+++ b/utils/llm/dspy_langfuse.py
@@ -1,8 +1,7 @@
 from dspy.utils.callback import BaseCallback
 from langfuse.decorators import langfuse_context
 from langfuse import Langfuse
-from litellm import completion_cost
-from langfuse.media import LangfuseMedia
+from litellm.cost_calculator import completion_cost
 from typing import Optional
 import dspy
 import contextvars
@@ -38,6 +37,7 @@ def __init__(self, signature: dspy.Signature):
 
     def on_module_start(self, call_id, *args, **kwargs):
         inputs = kwargs.get("inputs")
+        assert inputs is not None, "Inputs must be provided"
         extracted_args = inputs["kwargs"]
         input_field_values = {}
         for input_field_name in self.input_field_names:
@@ -74,6 +74,7 @@ def on_lm_start(self, call_id, *args, **kwargs):
         temperature = lm_dict.get("kwargs", {}).get("temperature")
         max_tokens = lm_dict.get("kwargs", {}).get("max_tokens")
         inputs = kwargs.get("inputs")
+        assert inputs is not None, "Inputs must be provided"
         messages = inputs.get("messages")
         assert messages[0].get("role") == "system"
         system_prompt = messages[0].get("content")
@@ -153,6 +154,7 @@ def on_lm_end(self, call_id, outputs, exception, **kwargs):
         ):
             try:
                 if hasattr(outputs, "usage"):
+                    assert outputs.usage is not None, "Usage must be provided"
                     prompt_tokens = outputs.usage.prompt_tokens
                     completion_tokens = outputs.usage.completion_tokens
                     total_tokens = outputs.usage.total_tokens

From c1675b6c7afae93bea5383bc1775f5a4ef44b194 Mon Sep 17 00:00:00 2001
From: Ilia Manolov <Ilia Manolov>
Date: Wed, 21 May 2025 17:27:10 +0100
Subject: [PATCH 4/6] Make the demo take a repo directory from the CLI

---
 utils/llm/demo.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/utils/llm/demo.py b/utils/llm/demo.py
index 80a3e5e..bddfcec 100644
--- a/utils/llm/demo.py
+++ b/utils/llm/demo.py
@@ -4,6 +4,14 @@
 import os
 import subprocess
 from pathlib import Path
+import sys
+
+if len(sys.argv) < 2:
+    print("Error: Please provide a repository path as the first argument. Example: python demo.py /home/*username*/git/*forge-project-name*/")
+    sys.exit(1)
+    
+repo_dir = Path(sys.argv[1])
+
 approved_commands = [
     'forge',
     'npm',
@@ -15,8 +23,6 @@
     'nvm'
 ]
 
-repo_dir = Path('/home/imanolov/git/Python-Template/resources/v2-core-public-cantina')
-
 
 def list_files_tool(relative_path: str | None = None) -> list[str] | str:
     """List all files in the given relative path."""

From 371ce5d6a48b1b45e5749deab19243edd62ab7cf Mon Sep 17 00:00:00 2001
From: Ilia Manolov <Ilia Manolov>
Date: Thu, 22 May 2025 12:06:39 +0100
Subject: [PATCH 5/6] Some small fixes

---
 utils/llm/demo.py           | 19 +++++++++++--------
 utils/llm/dspy_inference.py |  2 +-
 utils/llm/dspy_langfuse.py  |  2 +-
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/utils/llm/demo.py b/utils/llm/demo.py
index bddfcec..b6e1537 100644
--- a/utils/llm/demo.py
+++ b/utils/llm/demo.py
@@ -38,7 +38,7 @@ def list_files_tool(relative_path: str | None = None) -> list[str] | str:
 
     if not os.path.exists(absolute_path):
         print(f"Directory {relative_path} does not exist! Returning empty list.")
-        return 'Error: Directory {relative_path} does not exist!'
+        return f'Error: Directory {relative_path} does not exist!'
 
     print("Listing files in: " + relative_path)
     return [f for f in os.listdir(absolute_path) if os.path.isfile(os.path.join(absolute_path, f))]
@@ -57,7 +57,7 @@ def list_folders_tool(relative_path: str | None = None) -> list[str] | str:
 
     if not os.path.exists(absolute_path):
         print(f"Directory {relative_path} does not exist! Returning empty list.")
-        return 'Error: Directory {relative_path} does not exist!'
+        return f'Error: Directory {relative_path} does not exist!'
 
     print("Listing folders in: " + relative_path)
     return [f for f in os.listdir(absolute_path) if os.path.isdir(os.path.join(absolute_path, f))]
@@ -95,7 +95,7 @@ def execute_command_tool(command: str, relative_path: str = '.', accept_nonzero_
 
     if not os.path.exists(absolute_path):
         print(f"Directory {relative_path} does not exist!")
-        return 'Error: Directory {relative_path} does not exist!', 1
+        return f'Error: Directory {relative_path} does not exist!', 1
 
     try:
         result = subprocess.run(command, shell=True, cwd=absolute_path, capture_output=True, text=True)
@@ -107,7 +107,7 @@ def execute_command_tool(command: str, relative_path: str = '.', accept_nonzero_
         return 'Error: ' + str(e), 1
     
 def prompt_modify_file_tool(relative_path: str, new_content: str, oneline_modification_reason: str) -> str:
-    """Overwrite an existing file with the given new content. The file will be modified in place."""
+    """Overwrite an existing file with the given new content. The file will be modified in place. Use this tool as a last resort."""
 
     if '..' in relative_path:
         print("LLM tried to access parent directory! Returning empty string.")
@@ -162,12 +162,12 @@ class RunCommands(dspy.Signature):
 
 
 subprocess.run(['git', 'reset', '--hard'], cwd=repo_dir)
-subprocess.run(['git', 'clean', '-fdx'], cwd=repo_dir)
+subprocess.run(['git', 'clean', '-ffdx'], cwd=repo_dir)
 
 inf_module = DSPYInference(
-    pred_signature=RunCommands(),
+    pred_signature=RunCommands,
     tools=[list_files_tool, list_folders_tool, read_file_tool, execute_command_tool, prompt_modify_file_tool, final_check_before_completion],
-    max_iters=20,
+    max_iters=50,
 )
 
 result = asyncio.run(inf_module.run(
@@ -180,7 +180,10 @@ class RunCommands(dspy.Signature):
         {', '.join(approved_commands)}
         You will not combine commands with && or ||.
         You will always try to use the non-interactive or json versions of the commands.
-        You are allowed to use the help parameter of the commands if you want to understand them better.
+        Sometimes repos fail to give instructions on how to set up dependencies. In this case you should figure out how to pull them from one of the supplied commands.
+        For example, if you find out that forge-std doesn't exist, you can pull it in via `forge install foundry-rs/forge-std`.
+        Please be mindful of required versions and stick to them. Modify `remappings.txt` only as a last resort.
+        You are allowed to use the help parameter of the commands if you get confused or are uncertain about their usage.
         When supplying any relative path to a tool you will never use `..`. This is forbidden and will cause the tool to fail.
         You are not allowed to move freely using `cd`.
         You can additionally modify any file in the project that you have already read.
diff --git a/utils/llm/dspy_inference.py b/utils/llm/dspy_inference.py
index 422d22f..6f6448a 100644
--- a/utils/llm/dspy_inference.py
+++ b/utils/llm/dspy_inference.py
@@ -17,7 +17,7 @@
 class DSPYInference:
     def __init__(
         self,
-        pred_signature: dspy.Signature,
+        pred_signature: type[dspy.Signature],
         tools: list[Callable] = [],
         observe: bool = True,
         model_name: str = global_config.default_llm.default_model,
diff --git a/utils/llm/dspy_langfuse.py b/utils/llm/dspy_langfuse.py
index f3f4470..5dd0043 100644
--- a/utils/llm/dspy_langfuse.py
+++ b/utils/llm/dspy_langfuse.py
@@ -14,7 +14,7 @@
 
 # 1. Define a custom callback class that extends BaseCallback class
 class LangFuseDSPYCallback(BaseCallback):
-    def __init__(self, signature: dspy.Signature):
+    def __init__(self, signature: type[dspy.Signature]):
         super().__init__()
         # Use contextvars for per-call state
         self.current_system_prompt = contextvars.ContextVar("current_system_prompt")

From 5e0a28ca0ac57c133c159bfe70ebafb3f7c66eda Mon Sep 17 00:00:00 2001
From: Ilia Manolov <Ilia Manolov>
Date: Thu, 22 May 2025 13:55:11 +0100
Subject: [PATCH 6/6] Some prompt improvements

---
 utils/llm/demo.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/utils/llm/demo.py b/utils/llm/demo.py
index b6e1537..9af73af 100644
--- a/utils/llm/demo.py
+++ b/utils/llm/demo.py
@@ -107,7 +107,7 @@ def execute_command_tool(command: str, relative_path: str = '.', accept_nonzero_
         return 'Error: ' + str(e), 1
     
 def prompt_modify_file_tool(relative_path: str, new_content: str, oneline_modification_reason: str) -> str:
-    """Overwrite an existing file with the given new content. The file will be modified in place. Use this tool as a last resort."""
+    """Overwrite an existing file with the given new content. The file will be modified in place. In the vast majority of cases you should not use this tool and should instead consult the --help of the commands available to you."""
 
     if '..' in relative_path:
         print("LLM tried to access parent directory! Returning empty string.")
@@ -158,7 +158,7 @@ class RunCommands(dspy.Signature):
     high_level_instructions: str = dspy.InputField()
     forge_build_runs: bool = dspy.OutputField()
     forge_test_runs: bool = dspy.OutputField()
-    ran_commands: list[str] = dspy.OutputField()
+    shell_command_ran: list[str] = dspy.OutputField()
 
 
 subprocess.run(['git', 'reset', '--hard'], cwd=repo_dir)
@@ -179,21 +179,23 @@ class RunCommands(dspy.Signature):
         You can additionally only run the following commands at the root of the project:
         {', '.join(approved_commands)}
         You will not combine commands with && or ||.
-        You will always try to use the non-interactive or json versions of the commands.
+        You will always try to use the non-interactive or json versions of the commands. This is because the output is passed back to you in the same format.
         Sometimes repos fail to give instructions on how to set up dependencies. In this case you should figure out how to pull them from one of the supplied commands.
         For example, if you find out that forge-std doesn't exist, you can pull it in via `forge install foundry-rs/forge-std`.
-        Please be mindful of required versions and stick to them. Modify `remappings.txt` only as a last resort.
-        You are allowed to use the help parameter of the commands if you get confused or are uncertain about their usage.
+        Please be mindful of required versions and stick to them.
+        You should run a command with `--help` if you are unsure about its capabilities and don't have any ideas of how to proceed.
         When supplying any relative path to a tool you will never use `..`. This is forbidden and will cause the tool to fail.
         You are not allowed to move freely using `cd`.
         You can additionally modify any file in the project that you have already read.
-        Your goal is to run `forge build` successfully and to run `forge test` (which must complete but may have failing tests).
+        Your goal is to run `forge build` successfully and to run `forge test` (which must complete but may have failing tests so it may not necessarily "succeed").
+        Keep in mind that the `forge soldeer` subcommand exists and is the dependency manager for some projects. You should expect to see a `soldeer.lock` in these cases.
         You will not give up until these commands run successfully.
         You will never run any commands that open ports or a shell.
         You only have one shot at this so make sure to do it right.
         You should start by finding and opening the readme. Then, you should see what other files and folders are in the project.
         Your response will always finish with a call to `final_check_before_completion` that returns a success.
+        You will not execute any further commands after you have verified that `forge build` and `forge test` work.
     """
 ))
 
-print(result.ran_commands)
+print(result.shell_command_ran)