Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[question]Two characters cannot approach each other after they switch positions. #46

Open
shawokou123 opened this issue Apr 9, 2024 · 0 comments

Comments

@shawokou123
Copy link

I replace the second character with Gouki and define it as white Gouki_WHITE = [255, 255, 255]. If the two characters do not switch positions, the game proceeds normally, but once the two characters switch positions, the two characters cannot approach each other. play1 KEN_RED = [248, 0, 0], play 2 Gouki_WHITE = [255, 255, 255].
Please point out my programming error.

robot.py:

import os
import random
import re
import time
from collections import defaultdict
from typing import Dict, List, Literal, Optional

import numpy as np
from gymnasium import spaces
from loguru import logger
from phospho.lab import get_provider_and_model, get_sync_client
from rich import print

from .config import (
INDEX_TO_MOVE,
META_INSTRUCTIONS,
META_INSTRUCTIONS_WITH_LOWER,
MOVES,
NB_FRAME_WAIT,
X_SIZE,
Y_SIZE,
)
from .observer import detect_position_from_color

class Robot:
observations: List[Optional[dict]] = None # memory
next_steps: List[int] # action plan
actions: dict # actions of the agents during a step of the game
# actions of the agents during the previous step of the game
previous_actions: Dict[str, List[int]]
reward: float # reward of the agent

action_space: spaces.Space
character: Optional[str] = None  # character name
side: int  # side of the stage where playing: 0 = left, 1 = right
current_direction: Literal["Left", "Right"]  # current direction facing
sleepy: Optional[bool] = False  # if the robot is sleepy
only_punch: Optional[bool] = False  # if the robot only punch

model: str  # model of the robot
super_bar_own: int
player_nb: int  # player number

def __init__(
    self,
    action_space: spaces.Space,
    character: str,
    side: int,
    character_color: list,
    ennemy_color: list,
    sleepy: bool = False,
    only_punch: bool = False,
    model: str = "mistral:mistral-large-latest",
    player_nb: int = 0,  # 0 means not specified
):
    self.action_space = action_space
    self.character = character
    if side == 0:
        self.current_direction = "Right"
    elif side == 1:
        self.current_direction = "Left"

    self.observations = []
    self.next_steps = []
    self.character_color = character_color  # Character color list
    self.ennemy_color = ennemy_color  # ennemy color list
    self.side = side
    self.sleepy = sleepy
    self.only_punch = only_punch
    self.model = model
    self.previous_actions = defaultdict(list)
    self.actions = {}
    self.player_nb = player_nb

def act(self) -> int:
    """
    At each game frame, we execute the first action in the list of next steps.

    An action is an integer from 0 to 18, where 0 is no action.

    See the MOVES dictionary for the mapping of actions to moves.
    """
    if not self.next_steps or len(self.next_steps) == 0:
        return 0  # No move

    if self.sleepy:
        return 0

    if self.only_punch:
        # Do a Hadouken
        if self.current_direction == "Right":
            self.next_steps.extend(
                [
                    MOVES["Down"],
                    MOVES["Right+Down"],
                    MOVES["Right"],
                    MOVES["High Punch"],
                ]
            )
        elif self.current_direction == "Left":
            self.next_steps.extend(
                [
                    MOVES["Down"],
                    MOVES["Down+Left"],
                    MOVES["Left"],
                    MOVES["High Punch"],
                ]
            )

    next_step = self.next_steps.pop(0)

    return next_step

def plan(self) -> None:
    """
    The robot will plan its next steps by calling this method.

    In SF3, moves are based on combos, which are list of actions that must be executed in a sequence.

    Moves of Ken
    https://www.eventhubs.com/guides/2009/may/11/ken-street-fighter-3-third-strike-character-guide/

    Moves of Ryu
    https://www.eventhubs.com/guides/2008/may/09/ryu-street-fighter-3-third-strike-character-guide/
    """

    # If we already have a next step, we don't need to plan
    if len(self.next_steps) > 0:
        return

    # Call the LLM to get the next steps
    next_steps_from_llm = self.get_moves_from_llm()
    next_buttons_to_press = [
        button
        for combo in next_steps_from_llm
        for button in META_INSTRUCTIONS_WITH_LOWER[combo][
            self.current_direction.lower()
        ]
        # We add a wait time after each button press
        + [0] * NB_FRAME_WAIT
    ]
    self.next_steps.extend(next_buttons_to_press)

def observe(self, observation: dict, actions: dict, reward: float):
    """
    The robot will observe the environment by calling this method.

    The latest observations are at the end of the list.
    """

    # detect the position of characters and ennemy based on color
    character_position = detect_position_from_color(
        observation, self.character_color
    )
    ennemy_position = detect_position_from_color(
        observation, self.ennemy_color
    )

    if character_position is not None and ennemy_position is not None:
        # Store the positions in the observation dictionary
        observation["character_position"] = character_position
        observation["ennemy_position"] = ennemy_position
    else:
        # If positions couldn't be detected, set them to None
        observation["character_position"] = None
        observation["ennemy_position"] = None

    self.observations.append(observation)
    # we delete the oldest observation if we have more than 10 observations
    if len(self.observations) > 10:
        self.observations.pop(0)

    self.reward = reward

    # Update previous actions
    for key, value in actions.items():
        if value != 0:
            self.previous_actions[key].append(value)
            if len(self.previous_actions[key]) > 10:
                self.previous_actions[key].pop(0)

    # Update current direction based on character and ennemy positions
    self.update_current_direction(observation)

def update_current_direction(self, observation):
    """
    Update the current direction based on character and ennemy positions.
    """
    character_position = observation.get("character_position")
    ennemy_position = observation.get("ennemy_position")
    if character_position is not None and ennemy_position is not None:
        if character_position[0] < ennemy_position[0]:
            self.current_direction = "Right"
        else:
            self.current_direction = "Left"

def context_prompt(self) -> str:
    """
    Return a str of the context

    "The observation for you is Left"
    "The observation for the opponent is Left+Up"
    "The action history is Up"
    """

    # Create the position prompt
    side = self.side
    obs_own = self.observations[-1]["character_position"]
    obs_opp = self.observations[-1]["ennemy_position"]
    super_bar_own = self.observations[-1]["P" + str(side + 1)]["super_bar"][0]

    position_prompt = ""
    if obs_own is not None and obs_opp is not None:
        own_x, own_y = obs_own
        opp_x, opp_y = obs_opp

        distance_x = opp_x - own_x
        if distance_x < 0:
            position_prompt += "Your opponent is on the left."
        elif distance_x > 0:
            position_prompt += "Your opponent is on the right."
        else:
            position_prompt += "You are very close to the opponent."
    else:
        position_prompt = "Unable to determine opponent's position."

    power_prompt = ""
    if super_bar_own >= 30:
        power_prompt = "You can now use a powerful move. The names of the powerful moves are: Megafireball, Super attack 2."
    if super_bar_own >= 120 or super_bar_own == 0:
        power_prompt = "You can now only use very powerful moves. The names of the very powerful moves are: Super attack 3, Super attack 4"

    last_action_prompt = ""
    if len(self.previous_actions.keys()) > 0:
        act_own_list = self.previous_actions["agent_" + str(side)]
        act_opp_list = self.previous_actions["agent_" + str(abs(1 - side))]

        if len(act_own_list) == 0:
            act_own = 0
        else:
            act_own = act_own_list[-1]
        if len(act_opp_list) == 0:
            act_opp = 0
        else:
            act_opp = act_opp_list[-1]

        str_act_own = INDEX_TO_MOVE.get(act_own, "No action")
        str_act_opp = INDEX_TO_MOVE.get(act_opp, "No action")

        last_action_prompt += f"Your last action was {str_act_own}. The opponent's last action was {str_act_opp}."

    reward = self.reward

    score_prompt = ""
    if reward > 0:
        score_prompt += "You are winning. Keep attacking the opponent."
    elif reward < 0:
        score_prompt += "You are losing. Continue to attack the opponent but don't get hit."

    context = f"""{position_prompt}

{power_prompt}
{last_action_prompt}
Your current score is {reward}. {score_prompt}
To increase your score, move toward the opponent and attack the opponent. To prevent your score from decreasing, don't get hit by the opponent.
"""

    return context

def get_moves_from_llm(
    self,
) -> List[str]:
    """
    Get a list of moves from the language model.
    """

    # Filter the moves that are not in the list of moves
    invalid_moves = []
    valid_moves = []

    # If we are in the test environment, we don't want to call the LLM
    if os.getenv("DISABLE_LLM", "False") == "True":
        # Choose a random int from the list of moves
        logger.debug("DISABLE_LLM is True, returning a random move")
        return [random.choice(list(MOVES.values()))]

    while len(valid_moves) == 0:
        llm_response = self.call_llm()

        # The response is a bullet point list of moves. Use regex
        matches = re.findall(r"- ([\w ]+)", llm_response)
        moves = ["".join(match) for match in matches]
        invalid_moves = []
        valid_moves = []
        for move in moves:
            cleaned_move_name = move.strip().lower()
            if cleaned_move_name in META_INSTRUCTIONS_WITH_LOWER.keys():
                if self.player_nb == 1:
                    print(
                        f"[red] Player {self.player_nb} move: {cleaned_move_name}"
                    )
                elif self.player_nb == 2:
                    print(
                        f"[green] Player {self.player_nb} move: {cleaned_move_name}"
                    )
                valid_moves.append(cleaned_move_name)
            else:
                logger.debug(f"Invalid completion: {move}")
                logger.debug(f"Cleaned move name: {cleaned_move_name}")
                invalid_moves.append(move)

        if len(invalid_moves) > 1:
            logger.warning(f"Many invalid moves: {invalid_moves}")

    logger.debug(f"Next moves: {valid_moves}")
    return valid_moves

def call_llm(
    self,
    temperature: float = 0.7,
    max_tokens: int = 50,
    top_p: float = 1.0,
) -> str:
    """
    Make an API call to the language model.

    Edit this method to change the behavior of the robot!
    """
    provider_name, model_name = get_provider_and_model(self.model)
    client = get_sync_client(provider_name)

    # Generate the prompts
    move_list = "- " + "\n - ".join([move for move in META_INSTRUCTIONS])
    system_prompt = f"""You are the best and most aggressive Street Fighter III 3rd strike player in the world.

Your character is {self.character}. Your goal is to defeat the opponent. While constantly approaching and striking your opponent, you should also pay attention to your defense. If the opponent attacks you with moves like Fireball, you should retreat to defend, then seize the opportunity to close in and launch a deadly attack on your opponent.
{self.context_prompt()}
The moves you can use are:
{move_list}

Reply with a bullet point list of moves. The format should be: - <name of the move> separated by a new line.
Example if the opponent is close:

  • Move closer
  • Medium Punch

Example if the opponent is far:

  • Fireball

  • Move closer"""

      start_time = time.time()
      completion = client.chat.completions.create(
          model=model_name,
          messages=[
              {"role": "system", "content": system_prompt},
              {"role": "user", "content": "Your next moves are:"},
          ],
          temperature=temperature,
          max_tokens=max_tokens,
          top_p=top_p,
      )
      logger.debug(f"LLM call to {self.model}: {system_prompt}")
      logger.debug(f"LLM call to {self.model}: {time.time() - start_time}s")
      llm_response = completion.choices[0].message.content.strip()
      return llm_response
    
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant