Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,6 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


.vscode/
4 changes: 2 additions & 2 deletions global_config/global_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def unwrap(obj):

return {k: unwrap(v) for k, v in self.__dict__.items()}

def llm_api_key(self, model_name: str = None) -> str:
def llm_api_key(self, model_name: str | None = None) -> str:
"""Returns the appropriate API key based on the model name."""

model_identifier = model_name or self.model_name
Expand All @@ -122,7 +122,7 @@ def llm_api_key(self, model_name: str = None) -> str:
else:
raise ValueError(f"No API key configured for model: {model_identifier}")

def api_base(self, model_name: str) -> str:
def api_base(self, model_name: str) -> str | None:
"""Returns the Helicone link for the model."""
if "gpt" in model_name.lower() or re.match(
OPENAI_O_SERIES_PATTERN, model_name.lower()
Expand Down
4 changes: 2 additions & 2 deletions global_config/global_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ dot_global_config_health_check: true
# LLMs
########################################################
default_llm:
default_model: gemini/gemini-2.0-flash
default_model: gemini/gemini-2.5-flash-preview-05-20
default_temperature: 0.5
default_max_tokens: 100000
default_max_tokens: 1000000

llm_config:
cache_enabled: false
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ build-backend = "hatchling.build"

[tool.rye]
managed = true
dev-dependencies = []
dev-dependencies = [
"ipython>=9.2.0",
]

[tool.hatch.metadata]
allow-direct-references = true
Expand Down
34 changes: 34 additions & 0 deletions requirements-dev.lock
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ anyio==4.9.0
# via httpx
# via langfuse
# via openai
asttokens==3.0.0
# via stack-data
asyncer==0.0.8
# via dspy
attrs==25.3.0
Expand Down Expand Up @@ -58,6 +60,8 @@ colorlog==6.9.0
# via optuna
datasets==3.6.0
# via dspy
decorator==5.2.1
# via ipython
dill==0.3.8
# via datasets
# via multiprocess
Expand All @@ -67,6 +71,8 @@ distro==1.9.0
# via openai
dspy==2.6.24
# via python-template
executing==2.2.0
# via stack-data
filelock==3.18.0
# via datasets
# via huggingface-hub
Expand All @@ -80,6 +86,8 @@ google-auth==2.40.1
# via google-genai
google-genai==1.15.0
# via python-template
greenlet==3.2.2
# via sqlalchemy
h11==0.16.0
# via httpcore
httpcore==1.0.9
Expand All @@ -104,6 +112,11 @@ importlib-metadata==8.7.0
# via litellm
iniconfig==2.0.0
# via pytest
ipython==9.2.0
ipython-pygments-lexers==1.1.1
# via ipython
jedi==0.19.2
# via ipython
jinja2==3.1.6
# via litellm
jiter==0.9.0
Expand Down Expand Up @@ -132,6 +145,8 @@ markdown-it-py==3.0.0
markupsafe==3.0.2
# via jinja2
# via mako
matplotlib-inline==0.1.7
# via ipython
mdurl==0.1.2
# via markdown-it-py
multidict==6.4.3
Expand Down Expand Up @@ -161,17 +176,27 @@ packaging==24.1
pandas==2.2.3
# via datasets
# via dspy
parso==0.8.4
# via jedi
pathspec==0.12.1
# via black
pexpect==4.9.0
# via ipython
pillow==11.2.1
# via python-template
platformdirs==4.3.6
# via black
pluggy==1.5.0
# via pytest
prompt-toolkit==3.0.51
# via ipython
propcache==0.3.1
# via aiohttp
# via yarl
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.3
# via stack-data
pyarrow==20.0.0
# via datasets
pyasn1==0.6.1
Expand All @@ -188,6 +213,8 @@ pydantic==2.11.4
pydantic-core==2.33.2
# via pydantic
pygments==2.19.1
# via ipython
# via ipython-pygments-lexers
# via rich
pytest==8.3.3
# via python-template
Expand Down Expand Up @@ -231,6 +258,8 @@ sniffio==1.3.1
sqlalchemy==2.0.41
# via alembic
# via optuna
stack-data==0.6.3
# via ipython
tenacity==9.1.2
# via dspy
# via python-template
Expand All @@ -246,6 +275,9 @@ tqdm==4.67.1
# via huggingface-hub
# via openai
# via optuna
traitlets==5.14.3
# via ipython
# via matplotlib-inline
typing-extensions==4.13.2
# via alembic
# via anyio
Expand All @@ -267,6 +299,8 @@ urllib3==2.4.0
# via requests
vulture==2.14
# via python-template
wcwidth==0.2.13
# via prompt-toolkit
websockets==15.0.1
# via google-genai
wrapt==1.17.2
Expand Down
2 changes: 2 additions & 0 deletions requirements.lock
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ google-auth==2.40.1
# via google-genai
google-genai==1.15.0
# via python-template
greenlet==3.2.2
# via sqlalchemy
h11==0.16.0
# via httpcore
httpcore==1.0.9
Expand Down
201 changes: 201 additions & 0 deletions utils/llm/demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
from utils.llm.dspy_inference import DSPYInference
import dspy
import asyncio
import os
import subprocess
from pathlib import Path
import sys

if len(sys.argv) < 2:
print("Error: Please provide a repository path as the first argument. Example: python demo.py /home/*username*/git/*forge-project-name*/")
sys.exit(1)

repo_dir = Path(sys.argv[1])

approved_commands = [
'forge',
'npm',
'solc',
'foundryup',
'pnpm',
'yarn',
'git submodule',
'nvm'
]


def list_files_tool(relative_path: str | None = None) -> list[str] | str:
"""List all files in the given relative path."""

if relative_path is None:
relative_path = '.'

if '..' in relative_path:
print("LLM tried to access parent directory! Returning empty list.")
return 'Restricted: Tried to access parent directory!'

absolute_path = repo_dir / relative_path

if not os.path.exists(absolute_path):
print(f"Directory {relative_path} does not exist! Returning empty list.")
return f'Error: Directory {relative_path} does not exist!'

print("Listing files in: " + relative_path)
return [f for f in os.listdir(absolute_path) if os.path.isfile(os.path.join(absolute_path, f))]

def list_folders_tool(relative_path: str | None = None) -> list[str] | str:
"""List all folders in the given relative path."""

if relative_path is None:
relative_path = '.'

if '..' in relative_path:
print("LLM tried to access parent directory! Returning empty list.")
return 'Restricted: Tried to access parent directory!'

absolute_path = repo_dir / relative_path

if not os.path.exists(absolute_path):
print(f"Directory {relative_path} does not exist! Returning empty list.")
return f'Error: Directory {relative_path} does not exist!'

print("Listing folders in: " + relative_path)
return [f for f in os.listdir(absolute_path) if os.path.isdir(os.path.join(absolute_path, f))]

def read_file_tool(relative_path: str) -> str:
"""Read the contents of the given file."""

if '..' in relative_path:
print("LLM tried to access parent directory!")
return 'Restricted: Tried to access parent directory!'

absolute_path = repo_dir / relative_path

if not os.path.exists(absolute_path):
print(f"File {relative_path} does not exist!")
return 'Error: File {relative_path} does not exist!'

with open(absolute_path, 'r') as file:
print("File read successfully: " + relative_path)
return file.read()

def execute_command_tool(command: str, relative_path: str = '.', accept_nonzero_return_code: bool = False) -> tuple[str, int]:
"""Execute the given command inside of the given relative path and whether a non-zero return code is acceptable. Returns the output with "done" appended if the command was successful and the return code."""

print(f"Attempting to execute command: {command} in {relative_path}")
if not any(command.startswith(cmd) for cmd in approved_commands):
print("Command must start with one of the following: " + ', '.join(approved_commands) + "!")
return 'Error: Command must start with one of the following: ' + ', '.join(approved_commands), 1

if '..' in relative_path:
print("LLM tried to access parent directory!")
return 'Restricted: Tried to access parent directory!', 1

absolute_path = repo_dir / relative_path

if not os.path.exists(absolute_path):
print(f"Directory {relative_path} does not exist!")
return f'Error: Directory {relative_path} does not exist!', 1

try:
result = subprocess.run(command, shell=True, cwd=absolute_path, capture_output=True, text=True)
print("Command executed: " + command)
if not accept_nonzero_return_code and result.returncode != 0:
return 'Error: ' + result.stderr + '\nOutput: ' + result.stdout, result.returncode
return 'Output: ' + result.stdout + '\ndone', result.returncode
except subprocess.CalledProcessError as e:
return 'Error: ' + str(e), 1

def prompt_modify_file_tool(relative_path: str, new_content: str, oneline_modification_reason: str) -> str:
"""Overwrite an existing file with the given new content. The file will be modified in place. In the vast majority of cases you should not use this tool and should instead consult the --help of the commands available to you."""

if '..' in relative_path:
print("LLM tried to access parent directory! Returning empty string.")
return 'Restricted: Tried to access parent directory!'

absolute_path = repo_dir / relative_path

if not os.path.exists(absolute_path):
print(f"File {relative_path} does not exist! Returning empty string.")
return f'Error: File {relative_path} does not exist!'

print(f"LLM wants to modify file {relative_path}")
print(f"Reason: {oneline_modification_reason}")
print("Do you want to continue? (y/n)")
response = input().lower()
if response != 'y':
print("Modification cancelled by user.")
return 'Failure: Modification cancelled by user!'

with open(absolute_path, 'w') as file:
file.write(new_content)

return 'Success: File modified!'

def final_check_before_completion() -> str:
"""Always run this tool before finishing. This tool will tell you whether you are done or not."""

print('Trying to finish up...')

result1, returncode1 = execute_command_tool('forge build')
if returncode1 != 0:
return 'Failure: Forge build failed! Please continue getting the repo to build. Here is your output: ' + result1

print('Forge build passed! Trying to run tests...')

result2, returncode2 = execute_command_tool('forge test', accept_nonzero_return_code=True)

if result2.startswith('Error:') or result2.startswith('Failure:') or result2.startswith('Restricted:'):
return 'Failure: Forge test failed! Please continue getting the repo tests to run. Here is your output: ' + result2

print('Forge test passed! Should exit now...')

return 'Success: Forge build and test passed!'

class RunCommands(dspy.Signature):
"""Run commands in the given directory."""

high_level_instructions: str = dspy.InputField()
forge_build_runs: bool = dspy.OutputField()
forge_test_runs: bool = dspy.OutputField()
shell_command_ran: list[str] = dspy.OutputField()


subprocess.run(['git', 'reset', '--hard'], cwd=repo_dir)
subprocess.run(['git', 'clean', '-ffdx'], cwd=repo_dir)

inf_module = DSPYInference(
pred_signature=RunCommands,
tools=[list_files_tool, list_folders_tool, read_file_tool, execute_command_tool, prompt_modify_file_tool, final_check_before_completion],
max_iters=50,
)

result = asyncio.run(inf_module.run(
high_level_instructions=f"""
You are a professional smart contract security engineer.
Your current directory is inside of a git repository of a Foundry Forge Solidity project.
Your job is to build the project and run the tests.
You are allowed to read through files and folders in the project to understand it.
You can additionally only run the following commands at the root of the project:
{', '.join(approved_commands)}
You will not combine commands with && or ||.
You will always try to use the non-interactive or json versions of the commands. This is because the output is passed back to you in the same format.
Sometimes repos fail to give instructions on how to set up dependencies. In this case you should figure out how to pull them from one of the supplied commands.
For example, if you find out that forge-std doesn't exist, you can pull it in via `forge install foundry-rs/forge-std`.
Please be mindful of required versions and stick to them.
You should run a command with `--help` if you are unsure about its capabilities and don't have any ideas of how to proceed.
When supplying any relative path to a tool you will never use `..`. This is forbidden and will cause the tool to fail.
You are not allowed to move freely using `cd`.
You can additionally modify any file in the project that you have already read.
Your goal is to run `forge build` successfully and to run `forge test` (which must complete but may have failing tests so it may not necessarily "succeed").
Keep in mind that the `forge soldeer` subcommand exists and is the dependency manager for some projects. You should expect to see a `soldeer.lock` in these cases.
You will not give up until these commands run successfully.
You will never run any commands that open ports or a shell.
You only have one shot at this so make sure to do it right.
You should start by finding and opening the readme. Then, you should see what other files and folders are in the project.
Your response will always finish with a call to `final_check_before_completion` that returns a success.
You will not execute any further commands after you have verified that `forge build` and `forge test` work.
"""
))

print(result.shell_command_ran)
Loading