Edison-Watch · IliaMManolov · May 21, 2025 · May 21, 2025 · May 21, 2025 · May 21, 2025
diff --git a/.gitignore b/.gitignore
@@ -165,3 +165,6 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+
+.vscode/
diff --git a/global_config/global_config.py b/global_config/global_config.py
@@ -100,7 +100,7 @@ def unwrap(obj):
 
         return {k: unwrap(v) for k, v in self.__dict__.items()}
 
-    def llm_api_key(self, model_name: str = None) -> str:
+    def llm_api_key(self, model_name: str | None = None) -> str:
         """Returns the appropriate API key based on the model name."""
 
         model_identifier = model_name or self.model_name
@@ -122,7 +122,7 @@ def llm_api_key(self, model_name: str = None) -> str:
         else:
             raise ValueError(f"No API key configured for model: {model_identifier}")
 
-    def api_base(self, model_name: str) -> str:
+    def api_base(self, model_name: str) -> str | None:
         """Returns the Helicone link for the model."""
         if "gpt" in model_name.lower() or re.match(
             OPENAI_O_SERIES_PATTERN, model_name.lower()

diff --git a/global_config/global_config.yaml b/global_config/global_config.yaml
@@ -5,9 +5,9 @@ dot_global_config_health_check: true
 # LLMs
 ########################################################
 default_llm:
-  default_model: gemini/gemini-2.0-flash
+  default_model: gemini/gemini-2.5-flash-preview-05-20
   default_temperature: 0.5
-  default_max_tokens: 100000
+  default_max_tokens: 1000000
 
 llm_config:
   cache_enabled: false

diff --git a/pyproject.toml b/pyproject.toml
@@ -30,7 +30,9 @@ build-backend = "hatchling.build"
 
 [tool.rye]
 managed = true
-dev-dependencies = []
+dev-dependencies = [
+    "ipython>=9.2.0",
+]
 
 [tool.hatch.metadata]
 allow-direct-references = true

diff --git a/requirements-dev.lock b/requirements-dev.lock
@@ -28,6 +28,8 @@ anyio==4.9.0
     # via httpx
     # via langfuse
     # via openai
+asttokens==3.0.0
+    # via stack-data
 asyncer==0.0.8
     # via dspy
 attrs==25.3.0
@@ -58,6 +60,8 @@ colorlog==6.9.0
     # via optuna
 datasets==3.6.0
     # via dspy
+decorator==5.2.1
+    # via ipython
 dill==0.3.8
     # via datasets
     # via multiprocess
@@ -67,6 +71,8 @@ distro==1.9.0
     # via openai
 dspy==2.6.24
     # via python-template
+executing==2.2.0
+    # via stack-data
 filelock==3.18.0
     # via datasets
     # via huggingface-hub
@@ -80,6 +86,8 @@ google-auth==2.40.1
     # via google-genai
 google-genai==1.15.0
     # via python-template
+greenlet==3.2.2
+    # via sqlalchemy
 h11==0.16.0
     # via httpcore
 httpcore==1.0.9
@@ -104,6 +112,11 @@ importlib-metadata==8.7.0
     # via litellm
 iniconfig==2.0.0
     # via pytest
+ipython==9.2.0
+ipython-pygments-lexers==1.1.1
+    # via ipython
+jedi==0.19.2
+    # via ipython
 jinja2==3.1.6
     # via litellm
 jiter==0.9.0
@@ -132,6 +145,8 @@ markdown-it-py==3.0.0
 markupsafe==3.0.2
     # via jinja2
     # via mako
+matplotlib-inline==0.1.7
+    # via ipython
 mdurl==0.1.2
     # via markdown-it-py
 multidict==6.4.3
@@ -161,17 +176,27 @@ packaging==24.1
 pandas==2.2.3
     # via datasets
     # via dspy
+parso==0.8.4
+    # via jedi
 pathspec==0.12.1
     # via black
+pexpect==4.9.0
+    # via ipython
 pillow==11.2.1
     # via python-template
 platformdirs==4.3.6
     # via black
 pluggy==1.5.0
     # via pytest
+prompt-toolkit==3.0.51
+    # via ipython
 propcache==0.3.1
     # via aiohttp
     # via yarl
+ptyprocess==0.7.0
+    # via pexpect
+pure-eval==0.2.3
+    # via stack-data
 pyarrow==20.0.0
     # via datasets
 pyasn1==0.6.1
@@ -188,6 +213,8 @@ pydantic==2.11.4
 pydantic-core==2.33.2
     # via pydantic
 pygments==2.19.1
+    # via ipython
+    # via ipython-pygments-lexers
     # via rich
 pytest==8.3.3
     # via python-template
@@ -231,6 +258,8 @@ sniffio==1.3.1
 sqlalchemy==2.0.41
     # via alembic
     # via optuna
+stack-data==0.6.3
+    # via ipython
 tenacity==9.1.2
     # via dspy
     # via python-template
@@ -246,6 +275,9 @@ tqdm==4.67.1
     # via huggingface-hub
     # via openai
     # via optuna
+traitlets==5.14.3
+    # via ipython
+    # via matplotlib-inline
 typing-extensions==4.13.2
     # via alembic
     # via anyio
@@ -267,6 +299,8 @@ urllib3==2.4.0
     # via requests
 vulture==2.14
     # via python-template
+wcwidth==0.2.13
+    # via prompt-toolkit
 websockets==15.0.1
     # via google-genai
 wrapt==1.17.2

diff --git a/requirements.lock b/requirements.lock
@@ -80,6 +80,8 @@ google-auth==2.40.1
     # via google-genai
 google-genai==1.15.0
     # via python-template
+greenlet==3.2.2
+    # via sqlalchemy
 h11==0.16.0
     # via httpcore
 httpcore==1.0.9

diff --git a/utils/llm/demo.py b/utils/llm/demo.py
@@ -0,0 +1,201 @@
+from utils.llm.dspy_inference import DSPYInference
+import dspy
+import asyncio
+import os
+import subprocess
+from pathlib import Path
+import sys
+
+if len(sys.argv) < 2:
+    print("Error: Please provide a repository path as the first argument. Example: python demo.py /home/*username*/git/*forge-project-name*/")
+    sys.exit(1)
+
+repo_dir = Path(sys.argv[1])
+
+approved_commands = [
+    'forge',
+    'npm',
+    'solc',
+    'foundryup',
+    'pnpm',
+    'yarn',
+    'git submodule',
+    'nvm'
+]
+
+
+def list_files_tool(relative_path: str | None = None) -> list[str] | str:
+    """List all files in the given relative path."""
+
+    if relative_path is None:
+        relative_path = '.'
+
+    if '..' in relative_path:
+        print("LLM tried to access parent directory! Returning empty list.")
+        return 'Restricted: Tried to access parent directory!'
+
+    absolute_path = repo_dir / relative_path
+
+    if not os.path.exists(absolute_path):
+        print(f"Directory {relative_path} does not exist! Returning empty list.")
+        return f'Error: Directory {relative_path} does not exist!'
+
+    print("Listing files in: " + relative_path)
+    return [f for f in os.listdir(absolute_path) if os.path.isfile(os.path.join(absolute_path, f))]
+
+def list_folders_tool(relative_path: str | None = None) -> list[str] | str:
+    """List all folders in the given relative path."""
+
+    if relative_path is None:
+        relative_path = '.'
+
+    if '..' in relative_path:
+        print("LLM tried to access parent directory! Returning empty list.")
+        return 'Restricted: Tried to access parent directory!'
+
+    absolute_path = repo_dir / relative_path
+
+    if not os.path.exists(absolute_path):
+        print(f"Directory {relative_path} does not exist! Returning empty list.")
+        return f'Error: Directory {relative_path} does not exist!'
+
+    print("Listing folders in: " + relative_path)
+    return [f for f in os.listdir(absolute_path) if os.path.isdir(os.path.join(absolute_path, f))]
+
+def read_file_tool(relative_path: str) -> str:
+    """Read the contents of the given file."""
+
+    if '..' in relative_path:
+        print("LLM tried to access parent directory!")
+        return 'Restricted: Tried to access parent directory!'
+
+    absolute_path = repo_dir / relative_path
+
+    if not os.path.exists(absolute_path):
+        print(f"File {relative_path} does not exist!")
+        return 'Error: File {relative_path} does not exist!'
+
+    with open(absolute_path, 'r') as file:
+        print("File read successfully: " + relative_path)
+        return file.read()
+
+def execute_command_tool(command: str, relative_path: str = '.', accept_nonzero_return_code: bool = False) -> tuple[str, int]:
+    """Execute the given command inside of the given relative path and whether a non-zero return code is acceptable. Returns the output with "done" appended if the command was successful and the return code."""
+
+    print(f"Attempting to execute command: {command} in {relative_path}")
+    if not any(command.startswith(cmd) for cmd in approved_commands):
+        print("Command must start with one of the following: " + ', '.join(approved_commands) + "!")
+        return 'Error: Command must start with one of the following: ' + ', '.join(approved_commands), 1
+
+    if '..' in relative_path:
+        print("LLM tried to access parent directory!")
+        return 'Restricted: Tried to access parent directory!', 1
+
+    absolute_path = repo_dir / relative_path
+
+    if not os.path.exists(absolute_path):
+        print(f"Directory {relative_path} does not exist!")
+        return f'Error: Directory {relative_path} does not exist!', 1
+
+    try:
+        result = subprocess.run(command, shell=True, cwd=absolute_path, capture_output=True, text=True)
+        print("Command executed: " + command)
+        if not accept_nonzero_return_code and result.returncode != 0:
+            return 'Error: ' + result.stderr + '\nOutput: ' + result.stdout, result.returncode
+        return 'Output: ' + result.stdout + '\ndone', result.returncode
+    except subprocess.CalledProcessError as e:
+        return 'Error: ' + str(e), 1
+
+def prompt_modify_file_tool(relative_path: str, new_content: str, oneline_modification_reason: str) -> str:
+    """Overwrite an existing file with the given new content. The file will be modified in place. In the vast majority of cases you should not use this tool and should instead consult the --help of the commands available to you."""
+
+    if '..' in relative_path:
+        print("LLM tried to access parent directory! Returning empty string.")
+        return 'Restricted: Tried to access parent directory!'
+
+    absolute_path = repo_dir / relative_path
+
+    if not os.path.exists(absolute_path):
+        print(f"File {relative_path} does not exist! Returning empty string.")
+        return f'Error: File {relative_path} does not exist!'
+
+    print(f"LLM wants to modify file {relative_path}")
+    print(f"Reason: {oneline_modification_reason}")
+    print("Do you want to continue? (y/n)")
+    response = input().lower()
+    if response != 'y':
+        print("Modification cancelled by user.")
+        return 'Failure: Modification cancelled by user!'
+
+    with open(absolute_path, 'w') as file:
+        file.write(new_content)
+
+    return 'Success: File modified!'
+
+def final_check_before_completion() -> str:
+    """Always run this tool before finishing. This tool will tell you whether you are done or not."""
+
+    print('Trying to finish up...')
+
+    result1, returncode1 = execute_command_tool('forge build')
+    if returncode1 != 0:
+        return 'Failure: Forge build failed! Please continue getting the repo to build. Here is your output: ' + result1
+
+    print('Forge build passed! Trying to run tests...')
+
+    result2, returncode2 = execute_command_tool('forge test', accept_nonzero_return_code=True)
+
+    if result2.startswith('Error:') or result2.startswith('Failure:') or result2.startswith('Restricted:'):
+        return 'Failure: Forge test failed! Please continue getting the repo tests to run. Here is your output: ' + result2
+
+    print('Forge test passed! Should exit now...')
+
+    return 'Success: Forge build and test passed!'
+
+class RunCommands(dspy.Signature):
+    """Run commands in the given directory."""
+
+    high_level_instructions: str = dspy.InputField()
+    forge_build_runs: bool = dspy.OutputField()
+    forge_test_runs: bool = dspy.OutputField()
+    shell_command_ran: list[str] = dspy.OutputField()
+
+
+subprocess.run(['git', 'reset', '--hard'], cwd=repo_dir)
+subprocess.run(['git', 'clean', '-ffdx'], cwd=repo_dir)
+
+inf_module = DSPYInference(
+    pred_signature=RunCommands,
+    tools=[list_files_tool, list_folders_tool, read_file_tool, execute_command_tool, prompt_modify_file_tool, final_check_before_completion],
+    max_iters=50,
+)
+
+result = asyncio.run(inf_module.run(
+    high_level_instructions=f"""
+        You are a professional smart contract security engineer.
+        Your current directory is inside of a git repository of a Foundry Forge Solidity project.
+        Your job is to build the project and run the tests.
+        You are allowed to read through files and folders in the project to understand it.
+        You can additionally only run the following commands at the root of the project:
+        {', '.join(approved_commands)}
+        You will not combine commands with && or ||.
+        You will always try to use the non-interactive or json versions of the commands. This is because the output is passed back to you in the same format.
+        Sometimes repos fail to give instructions on how to set up dependencies. In this case you should figure out how to pull them from one of the supplied commands.
+        For example, if you find out that forge-std doesn't exist, you can pull it in via `forge install foundry-rs/forge-std`.
+        Please be mindful of required versions and stick to them.
+        You should run a command with `--help` if you are unsure about its capabilities and don't have any ideas of how to proceed.
+        When supplying any relative path to a tool you will never use `..`. This is forbidden and will cause the tool to fail.
+        You are not allowed to move freely using `cd`.
+        You can additionally modify any file in the project that you have already read.
+        Your goal is to run `forge build` successfully and to run `forge test` (which must complete but may have failing tests so it may not necessarily "succeed").
+        Keep in mind that the `forge soldeer` subcommand exists and is the dependency manager for some projects. You should expect to see a `soldeer.lock` in these cases.
+        You will not give up until these commands run successfully.
+        You will never run any commands that open ports or a shell.
+        You only have one shot at this so make sure to do it right.
+        You should start by finding and opening the readme. Then, you should see what other files and folders are in the project.
+        Your response will always finish with a call to `final_check_before_completion` that returns a success.
+        You will not execute any further commands after you have verified that `forge build` and `forge test` work.
+    """
+))
+
+print(result.shell_command_ran)