Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add integration test with dummy agent #1316

Merged
merged 14 commits into from Apr 30, 2024
5 changes: 4 additions & 1 deletion agenthub/__init__.py
Expand Up @@ -12,9 +12,12 @@
from . import planner_agent # noqa: E402
from . import SWE_agent # noqa: E402
from . import delegator_agent # noqa: E402
from . import dummy_agent # noqa: E402

__all__ = ['monologue_agent', 'codeact_agent',
'planner_agent', 'SWE_agent', 'delegator_agent']
'planner_agent', 'SWE_agent',
'delegator_agent',
'dummy_agent']

for agent in all_microagents.values():
name = agent['name']
Expand Down
4 changes: 4 additions & 0 deletions agenthub/dummy_agent/__init__.py
@@ -0,0 +1,4 @@
from opendevin.agent import Agent
from .agent import DummyAgent

Agent.register('DummyAgent', DummyAgent)
118 changes: 118 additions & 0 deletions agenthub/dummy_agent/agent.py
@@ -0,0 +1,118 @@
import time
from typing import List, TypedDict

from opendevin.agent import Agent
from opendevin.llm.llm import LLM
from opendevin.state import State
from opendevin.action import (
Action,
CmdRunAction,
FileWriteAction,
FileReadAction,
AgentFinishAction,
AgentThinkAction,
AddTaskAction,
ModifyTaskAction,
AgentRecallAction,
BrowseURLAction,
)
from opendevin.observation import (
Observation,
NullObservation,
CmdOutputObservation,
FileWriteObservation,
FileReadObservation,
AgentRecallObservation,
)

"""
FIXME: There are a few problems this surfaced
* FileWrites seem to add an unintended newline at the end of the file
* command_id is sometimes a number, sometimes a string
* Why isn't the output of the background command split between two steps?
* Browser not working
"""

ActionObs = TypedDict('ActionObs', {'action': Action, 'observations': List[Observation]})

BACKGROUND_CMD = 'echo "This is in the background" && sleep .1 && echo "This too"'


class DummyAgent(Agent):
'''
The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
without making any LLM calls.
'''

def __init__(self, llm: LLM):
super().__init__(llm)
self.steps: List[ActionObs] = [{
'action': AddTaskAction(parent='0', goal='check the current directory'),
'observations': [NullObservation('')],
}, {
'action': AddTaskAction(parent='0.0', goal='run ls'),
'observations': [NullObservation('')],
}, {
'action': ModifyTaskAction(id='0.0', state='in_progress'),
'observations': [NullObservation('')],
}, {
'action': AgentThinkAction(thought='Time to get started!'),
'observations': [NullObservation('')],
}, {
'action': CmdRunAction(command='echo "foo"'),
'observations': [CmdOutputObservation('foo', command_id=-1, command='echo "foo"')],
}, {
'action': FileWriteAction(content='echo "Hello, World!"', path='hello.sh'),
'observations': [FileWriteObservation('', path='hello.sh')],
}, {
'action': FileReadAction(path='hello.sh'),
'observations': [FileReadObservation('echo "Hello, World!"\n', path='hello.sh')],
}, {
'action': CmdRunAction(command='bash hello.sh'),
'observations': [CmdOutputObservation('Hello, World!', command_id=-1, command='bash hello.sh')],
}, {
'action': CmdRunAction(command=BACKGROUND_CMD, background=True),
'observations': [
CmdOutputObservation('Background command started. To stop it, send a `kill` action with id 42', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type]
CmdOutputObservation('This is in the background\nThis too\n', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type]
]
}, {
'action': AgentRecallAction(query='who am I?'),
'observations': [
AgentRecallObservation('', memories=['I am a computer.']),
# CmdOutputObservation('This too\n', command_id='42', command=BACKGROUND_CMD),
],
}, {
'action': BrowseURLAction(url='https://google.com'),
'observations': [
# BrowserOutputObservation('<html></html>', url='https://google.com', screenshot=""),
],
}, {
'action': AgentFinishAction(),
'observations': [],
}]

def step(self, state: State) -> Action:
time.sleep(0.1)
if state.iteration > 0:
prev_step = self.steps[state.iteration - 1]
if 'observations' in prev_step:
expected_observations = prev_step['observations']
hist_start = len(state.history) - len(expected_observations)
for i in range(len(expected_observations)):
hist_obs = state.history[hist_start + i][1].to_dict()
expected_obs = expected_observations[i].to_dict()
if 'command_id' in hist_obs['extras'] and hist_obs['extras']['command_id'] != -1:
del hist_obs['extras']['command_id']
hist_obs['content'] = ''
if 'command_id' in expected_obs['extras'] and expected_obs['extras']['command_id'] != -1:
del expected_obs['extras']['command_id']
expected_obs['content'] = ''
if hist_obs != expected_obs:
print('\nactual', hist_obs)
print('\nexpect', expected_obs)
assert hist_obs == expected_obs, f'Expected observation {expected_obs}, got {hist_obs}'
return self.steps[state.iteration]['action']

def search_memory(self, query: str) -> List[str]:
return ['I am a computer.']
2 changes: 1 addition & 1 deletion opendevin/action/agent.py
Expand Up @@ -21,7 +21,7 @@ class AgentRecallAction(ExecutableAction):

async def run(self, controller: 'AgentController') -> AgentRecallObservation:
return AgentRecallObservation(
content='Recalling memories...',
content='',
memories=controller.agent.search_memory(self.query),
)

Expand Down
5 changes: 4 additions & 1 deletion opendevin/sandbox/docker/exec_box.py
Expand Up @@ -122,7 +122,10 @@ def run_command(container, command):
self.container.exec_run(
f'kill -9 {pid}', workdir=SANDBOX_WORKSPACE_DIR)
return -1, f'Command: "{cmd}" timed out'
return exit_code, logs.decode('utf-8').strip()
logs_out = logs.decode('utf-8')
if logs_out.endswith('\n'):
logs_out = logs_out[:-1]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you are changing strip() to this, you'd probably wanna do this in other sandboxes too?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH I want to redo all the log parsing in a separate PR--I don't think we should be stripping whitespace from log output. This is here because the tests don't run consistently otherwise ☹️

return exit_code, logs_out

def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
# mkdir -p sandbox_dest if it doesn't exist
Expand Down
15 changes: 15 additions & 0 deletions tests/integration/test_actions.py
@@ -0,0 +1,15 @@
import asyncio

from agenthub.dummy_agent import DummyAgent

from opendevin.controller import AgentController
from opendevin.llm.llm import LLM


def test_actions_with_dummy_agent():
Copy link
Collaborator

@li-boxuan li-boxuan Apr 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am sorry but maybe it's a bad idea to add this as a pytest test.

If we want to keep it this way, we need to:

  1. add an annotation here:
@pytest.mark.skipif(os.environ.get('AGENT') != 'DummyAgent', reason='Designed to test DummyAgent only')
  1. Add another annotation in test_agent.py that DummyAgent needs to be skipped:
@pytest.mark.skipif(os.environ.get('AGENT') == 'DummyAgent', reason='DummyAgent is special and cannot solve any real task')
  1. Add DummyAgent to run-integration-tests.yml, so that a dedicated job runner will run DummyAgent test. Right now, I didn't check but I suppose all test runners run this test, which is unnecessary.

llm = LLM('not-a-real-model')
agent = DummyAgent(llm=llm)
controller = AgentController(agent=agent)

asyncio.run(controller.start('do a flip'))
# assertions are inside the DummyAgent