New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add integration test with dummy agent #1316
Merged
Merged
Changes from 8 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
6dff4e3
first pass at dummy
rbren 57b45ed
add assertion to dummy
rbren ff449dc
add dummy workflow
rbren cb7d7c5
beef up tests
rbren 048e625
try and fix huggingface issue
rbren 889bbd6
remove newlines
rbren 0e29791
rename test
rbren de8121c
move to pytest
rbren 85cd78d
Revert " move to pytest"
rbren dc87794
fix lint
rbren 51d044b
Merge branch 'main' into rb/dummy-test
rbren 6c5cd74
delint
rbren acd236f
Update .github/workflows/dummy-agent-test.yml
rbren 45f13fa
Merge branch 'main' into rb/dummy-test
rbren File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from opendevin.agent import Agent | ||
from .agent import DummyAgent | ||
|
||
Agent.register('DummyAgent', DummyAgent) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
import time | ||
from typing import List, TypedDict | ||
|
||
from opendevin.agent import Agent | ||
from opendevin.llm.llm import LLM | ||
from opendevin.state import State | ||
from opendevin.action import ( | ||
Action, | ||
CmdRunAction, | ||
FileWriteAction, | ||
FileReadAction, | ||
AgentFinishAction, | ||
AgentThinkAction, | ||
AddTaskAction, | ||
ModifyTaskAction, | ||
AgentRecallAction, | ||
BrowseURLAction, | ||
) | ||
from opendevin.observation import ( | ||
Observation, | ||
NullObservation, | ||
CmdOutputObservation, | ||
FileWriteObservation, | ||
FileReadObservation, | ||
AgentRecallObservation, | ||
) | ||
|
||
""" | ||
FIXME: There are a few problems this surfaced | ||
* FileWrites seem to add an unintended newline at the end of the file | ||
* command_id is sometimes a number, sometimes a string | ||
* Why isn't the output of the background command split between two steps? | ||
* Browser not working | ||
""" | ||
|
||
ActionObs = TypedDict('ActionObs', {'action': Action, 'observations': List[Observation]}) | ||
|
||
BACKGROUND_CMD = 'echo "This is in the background" && sleep .1 && echo "This too"' | ||
|
||
|
||
class DummyAgent(Agent): | ||
''' | ||
The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically, | ||
without making any LLM calls. | ||
''' | ||
|
||
def __init__(self, llm: LLM): | ||
super().__init__(llm) | ||
self.steps: List[ActionObs] = [{ | ||
'action': AddTaskAction(parent='0', goal='check the current directory'), | ||
'observations': [NullObservation('')], | ||
}, { | ||
'action': AddTaskAction(parent='0.0', goal='run ls'), | ||
'observations': [NullObservation('')], | ||
}, { | ||
'action': ModifyTaskAction(id='0.0', state='in_progress'), | ||
'observations': [NullObservation('')], | ||
}, { | ||
'action': AgentThinkAction(thought='Time to get started!'), | ||
'observations': [NullObservation('')], | ||
}, { | ||
'action': CmdRunAction(command='echo "foo"'), | ||
'observations': [CmdOutputObservation('foo', command_id=-1, command='echo "foo"')], | ||
}, { | ||
'action': FileWriteAction(content='echo "Hello, World!"', path='hello.sh'), | ||
'observations': [FileWriteObservation('', path='hello.sh')], | ||
}, { | ||
'action': FileReadAction(path='hello.sh'), | ||
'observations': [FileReadObservation('echo "Hello, World!"\n', path='hello.sh')], | ||
}, { | ||
'action': CmdRunAction(command='bash hello.sh'), | ||
'observations': [CmdOutputObservation('Hello, World!', command_id=-1, command='bash hello.sh')], | ||
}, { | ||
'action': CmdRunAction(command=BACKGROUND_CMD, background=True), | ||
'observations': [ | ||
CmdOutputObservation('Background command started. To stop it, send a `kill` action with id 42', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type] | ||
CmdOutputObservation('This is in the background\nThis too\n', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type] | ||
] | ||
}, { | ||
'action': AgentRecallAction(query='who am I?'), | ||
'observations': [ | ||
AgentRecallObservation('', memories=['I am a computer.']), | ||
# CmdOutputObservation('This too\n', command_id='42', command=BACKGROUND_CMD), | ||
], | ||
}, { | ||
'action': BrowseURLAction(url='https://google.com'), | ||
'observations': [ | ||
# BrowserOutputObservation('<html></html>', url='https://google.com', screenshot=""), | ||
], | ||
}, { | ||
'action': AgentFinishAction(), | ||
'observations': [], | ||
}] | ||
|
||
def step(self, state: State) -> Action: | ||
time.sleep(0.1) | ||
if state.iteration > 0: | ||
prev_step = self.steps[state.iteration - 1] | ||
if 'observations' in prev_step: | ||
expected_observations = prev_step['observations'] | ||
hist_start = len(state.history) - len(expected_observations) | ||
for i in range(len(expected_observations)): | ||
hist_obs = state.history[hist_start + i][1].to_dict() | ||
expected_obs = expected_observations[i].to_dict() | ||
if 'command_id' in hist_obs['extras'] and hist_obs['extras']['command_id'] != -1: | ||
del hist_obs['extras']['command_id'] | ||
hist_obs['content'] = '' | ||
if 'command_id' in expected_obs['extras'] and expected_obs['extras']['command_id'] != -1: | ||
del expected_obs['extras']['command_id'] | ||
expected_obs['content'] = '' | ||
if hist_obs != expected_obs: | ||
print('\nactual', hist_obs) | ||
print('\nexpect', expected_obs) | ||
assert hist_obs == expected_obs, f'Expected observation {expected_obs}, got {hist_obs}' | ||
return self.steps[state.iteration]['action'] | ||
|
||
def search_memory(self, query: str) -> List[str]: | ||
return ['I am a computer.'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import asyncio | ||
|
||
from agenthub.dummy_agent import DummyAgent | ||
|
||
from opendevin.controller import AgentController | ||
from opendevin.llm.llm import LLM | ||
|
||
|
||
def test_actions_with_dummy_agent(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am sorry but maybe it's a bad idea to add this as a pytest test. If we want to keep it this way, we need to:
@pytest.mark.skipif(os.environ.get('AGENT') != 'DummyAgent', reason='Designed to test DummyAgent only')
@pytest.mark.skipif(os.environ.get('AGENT') == 'DummyAgent', reason='DummyAgent is special and cannot solve any real task')
|
||
llm = LLM('not-a-real-model') | ||
agent = DummyAgent(llm=llm) | ||
controller = AgentController(agent=agent) | ||
|
||
asyncio.run(controller.start('do a flip')) | ||
# assertions are inside the DummyAgent |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if you are changing
strip()
to this, you'd probably wanna do this in other sandboxes too?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TBH I want to redo all the log parsing in a separate PR--I don't think we should be stripping whitespace from log output. This is here because the tests don't run consistently otherwise☹️