diff --git a/README.md b/README.md index 7f4040693..5f63cd526 100644 --- a/README.md +++ b/README.md @@ -121,19 +121,21 @@ Press key ```r``` for loading a new scenario, and ```b``` or ```q``` for switchi [comment]: <> (```) ### Basic Usage -To build the RL environment in python script, you can simply code in the OpenAI gym format as: +To build the RL environment in python script, you can simply code in the Farama Gymnasium format as: ```python import metadrive # Import this package to register the environment! -import gym +import gymnasium as gym + +env = gym.make("MetaDrive-validation-v0", config={"use_render": True}) + +# Alternatively, you can instantiate using the class +# env = metadrive.MetaDriveEnv(config={"use_render": True, "num_scenarios": 100}) -env = gym.make("MetaDrive-v0", config=dict(use_render=True)) -# env = metadrive.MetaDriveEnv(config=dict(environment_num=100)) # Or build environment from class env.reset() for i in range(1000): - obs, reward, done, info = env.step(env.action_space.sample()) # Use random policy - env.render() - if done: + obs, reward, terminated, truncated, info = env.step(env.action_space.sample()) # Use random policy + if terminated or truncated: env.reset() env.close() ``` diff --git a/documentation/source/get_start.rst b/documentation/source/get_start.rst index 1765b21c9..149744f84 100644 --- a/documentation/source/get_start.rst +++ b/documentation/source/get_start.rst @@ -59,14 +59,13 @@ The following scripts is a minimal example for instantiating a MetaDrive environ .. code-block:: python import metadrive # Import this package to register the environment! - import gym + import gymnasium as gym - env = gym.make("MetaDrive-v0", config=dict(use_render=True)) + env = gym.make("MetaDrive-validation-v0", config={"use_render": True}) env.reset() for i in range(1000): - obs, reward, done, info = env.step(env.action_space.sample()) - env.render() - if done: + obs, reward, terminated, truncated, info = env.step(env.action_space.sample()) + if terminated or truncated: env.reset() env.close() diff --git a/documentation/source/observation.rst b/documentation/source/observation.rst index 612c4e187..634cedd87 100644 --- a/documentation/source/observation.rst +++ b/documentation/source/observation.rst @@ -64,12 +64,12 @@ The following is a minimal script to use Top-down observation. from metadrive import TopDownMetaDrive - env = TopDownMetaDrive() - o = env.reset() - for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) - env.render(mode="top_down") - if d: + env = TopDownMetaDrive({"render_mode": "top_down"}) + o,i = env.reset() + for s in range(1, 100000): + o, r, tm, tc, info = env.step([0, 1]) + env.render() + if tm or tc: env.reset() env.close() diff --git a/documentation/source/reward_cost_and_termination_function.rst b/documentation/source/reward_cost_and_termination_function.rst index 752733f5e..38c17b717 100644 --- a/documentation/source/reward_cost_and_termination_function.rst +++ b/documentation/source/reward_cost_and_termination_function.rst @@ -3,8 +3,8 @@ Reward, Cost, Termination and Step Information ############################################### Following the standard OpenAI Gym API, after each step of the environment :code:`env.step(...)`, the environment will return -a tuple containing four items: :code:`(obs, reward, done, info)`. In this page, we discuss the design of reward function :code:`reward`, cost function :code:`info["cost"]`, -termination criterion :code:`done` in various settings and the details of step information :code:`info`. +a tuple containing five items: :code:`(obs, reward, terminated, truncated, info)`. In this page, we discuss the design of reward function :code:`reward`, cost function :code:`info["cost"]`, +termination criterion :code:`terminated` in various settings, truncation information :code:`truncated`, and the details of step information :code:`info`. Reward Function ################# @@ -81,6 +81,11 @@ The environmental episode has a **minimal length** of :code:`horizon` steps and If you wish to disable the respawning mechanism in MARL, set the config :code:`allow_respawn = False`. In this case, the environmental episode will terminate if no active vehicles are in the scene. + +Truncation Information +####################### +Currently, this is always set to `False` for each vehicle. + Step Information ####################### diff --git a/metadrive/envs/base_env.py b/metadrive/envs/base_env.py index 8197d4345..f9b613c94 100644 --- a/metadrive/envs/base_env.py +++ b/metadrive/envs/base_env.py @@ -3,7 +3,8 @@ from collections import defaultdict from typing import Union, Dict, AnyStr, Optional, Tuple, Callable -import gym +import gymnasium as gym + import numpy as np from panda3d.core import PNMImage @@ -50,7 +51,8 @@ action_check=False, # ===== Rendering ===== - use_render=False, # pop a window to render or not + use_render=False, # if true pop a window to render + render_mode=None, # if "human", then use_render must be true, if "rgb", return numpy array, if None, do neither debug=False, disable_model_compression=False, # disable compression if you wish to launch the window quicker. cull_scene=True, # only for debug use @@ -201,15 +203,15 @@ class BaseEnv(gym.Env): # Force to use this seed if necessary. Note that the recipient of the forced seed should be explicitly implemented. - _DEBUG_RANDOM_SEED = None + _DEBUG_RANDOM_SEED: Union[int, None] = None DEFAULT_AGENT = DEFAULT_AGENT @classmethod - def default_config(cls) -> "Config": + def default_config(cls) -> Config: return Config(BASE_DEFAULT_CONFIG) # ===== Intialization ===== - def __init__(self, config: dict = None): + def __init__(self, config: Union[dict, None] = None): if config is None: config = {} merged_config = self._merge_extra_config(config) @@ -236,11 +238,10 @@ def __init__(self, config: dict = None): # self.engine: Optional[BaseEngine] = None # In MARL envs with respawn mechanism, varying episode lengths might happen. - self.dones = None self.episode_rewards = defaultdict(float) self.episode_lengths = defaultdict(int) - def _merge_extra_config(self, config: Union[dict, "Config"]) -> "Config": + def _merge_extra_config(self, config: Union[dict, Config]) -> Config: """Check, update, sync and overwrite some config.""" return config @@ -294,8 +295,7 @@ def _after_lazy_init(self): def step(self, actions: Union[np.ndarray, Dict[AnyStr, np.ndarray], int]): actions = self._preprocess_actions(actions) engine_info = self._step_simulator(actions) - o, r, d, i = self._get_step_return(actions, engine_info=engine_info) - return o, r, d, i + return self._get_step_return(actions, engine_info=engine_info) def _preprocess_actions(self, actions: Union[np.ndarray, Dict[AnyStr, np.ndarray], int]) \ -> Union[np.ndarray, Dict[AnyStr, np.ndarray], int]: @@ -343,17 +343,18 @@ def done_function(self, vehicle_id: str) -> Tuple[bool, Dict]: raise NotImplementedError() def render(self, - mode='human', text: Optional[Union[dict, str]] = None, return_bytes=False, *args, **kwargs) -> Optional[np.ndarray]: """ This is a pseudo-render function, only used to update onscreen message when using panda3d backend - :param mode: 'rgb'/'human' :param text:text to show :return: when mode is 'rgb', image array is returned """ + + mode = self.config["render_mode"] + if mode in ["top_down", "topdown", "bev", "birdview"]: ret = self._render_topdown(text=text, *args, **kwargs) return ret @@ -382,15 +383,15 @@ def render(self, # logging.warning("You do not set 'image_observation' or 'image_observation' to True, so no image will be returned!") return None - def reset(self, force_seed: Union[None, int] = None): + def reset(self, seed: Union[None, int] = None): """ Reset the env, scene can be restored and replayed by giving episode_data Reset the environment or load an episode from episode data to recover is - :param force_seed: The seed to set the env. + :param seed: The seed to set the env. :return: None """ self.lazy_init() # it only works the first time when reset() is called to avoid the error when render - self._reset_global_seed(force_seed) + self._reset_global_seed(seed) if self.engine is None: raise ValueError( "Current MetaDrive instance is broken. Please make sure there is only one active MetaDrive " @@ -411,12 +412,30 @@ def reset(self, force_seed: Union[None, int] = None): return self._get_reset_return() def _get_reset_return(self): - ret = {} - self.engine.after_step() + # TODO: figure out how to get the information of the before step + scene_manager_before_step_infos = {} + scene_manager_after_step_infos = self.engine.after_step() + + obses = {} + done_infos = {} + cost_infos = {} + reward_infos = {} + engine_info = merge_dicts( + scene_manager_after_step_infos, scene_manager_before_step_infos, allow_new_keys=True, without_copy=True + ) for v_id, v in self.vehicles.items(): self.observations[v_id].reset(self, v) - ret[v_id] = self.observations[v_id].observe(v) - return ret if self.is_multi_agent else self._wrap_as_single_agent(ret) + obses[v_id] = self.observations[v_id].observe(v) + _, reward_infos[v_id] = self.reward_function(v_id) + _, done_infos[v_id] = self.done_function(v_id) + _, cost_infos[v_id] = self.cost_function(v_id) + + step_infos = concat_step_infos([engine_info, done_infos, reward_infos, cost_infos]) + + if self.is_multi_agent: + return (obses, step_infos) + else: + return (self._wrap_as_single_agent(obses), self._wrap_as_single_agent(step_infos)) def _get_step_return(self, actions, engine_info): # update obs, dones, rewards, costs, calculate done at first ! @@ -444,7 +463,9 @@ def _get_step_return(self, actions, engine_info): for k in self.dones: self.dones[k] = True - dones = {k: self.dones[k] for k in self.vehicles.keys()} + terminateds = {k: self.dones[k] for k in self.vehicles.keys()} + truncateds = {k: False for k in self.vehicles.keys()} + for v_id, r in rewards.items(): self.episode_rewards[v_id] += r step_infos[v_id]["episode_reward"] = self.episode_rewards[v_id] @@ -453,9 +474,9 @@ def _get_step_return(self, actions, engine_info): if not self.is_multi_agent: return self._wrap_as_single_agent(obses), self._wrap_as_single_agent(rewards), \ - self._wrap_as_single_agent(dones), self._wrap_as_single_agent(step_infos) + self._wrap_as_single_agent(terminateds), self._wrap_as_single_agent(truncateds), self._wrap_as_single_agent(step_infos) else: - return obses, rewards, dones, step_infos + return obses, rewards, terminateds, truncateds, step_infos def close(self): if self.engine is not None: @@ -632,12 +653,13 @@ def _act(observation): self.config["record_episode"] = True done_info = {} for index in scenario_index: - obs = self.reset(force_seed=index) + obs = self.reset(seed=index) done = False count = 0 info = None while not done: - obs, reward, done, info = self.step(_act(obs)) + obs, reward, terminated, truncated, info = self.step(_act(obs)) + done = terminated or truncated count += 1 if max_episode_length is not None and count > max_episode_length: done = True diff --git a/metadrive/envs/gym_wrapper.py b/metadrive/envs/gym_wrapper.py new file mode 100644 index 000000000..89c34ad10 --- /dev/null +++ b/metadrive/envs/gym_wrapper.py @@ -0,0 +1,78 @@ +try: + from typing import Any, Dict + import gymnasium + import gym + import gym.spaces + + class GymEnvWrapper(gym.Env): + def __init__(self, config: Dict[str, Any]): + """ + Note that config must contain two items: + "inner_class": the class of a Metadrive environment (not instantiated) + "inner_config": The config that will be passed to the Metadrive environment + """ + inner_class = config["inner_class"] + inner_config = config["inner_config"] + assert isinstance(inner_class, type) + assert isinstance(inner_config, dict) + self._inner = inner_class(config=inner_config) + + def step(self, actions): + o, r, tm, tc, i = self._inner.step(actions) + if isinstance(tm, dict) and isinstance(tc, dict): + d = {tm[j] or tc[j] for j in set(list(tm.keys()) + list(tc.keys()))} + else: + d = tm or tc + return o, r, d, i + + def reset(self, *, seed=None, options=None): + # pass non-none parameters to the reset (which may not support options or seed) + params = {"seed": seed, "options": options} + not_none_params = {k: v for k, v in params.items() if v is not None} + obs, _ = self._inner.reset(**not_none_params) + return obs + + def render(self, *args, **kwargs): + # remove mode from kwargs + kwargs.pop("mode", None) + return self._inner.render(*args, **kwargs) + + def close(self): + self._inner.close() + + def seed(self, seed=None): + """ + We cannot seed a Gymnasium environment while running, so do nothing + """ + pass + + @property + def observation_space(self): + obs_space = self._inner.observation_space + assert isinstance(obs_space, gymnasium.spaces.Box) + return gym.spaces.Box(low=obs_space.low, high=obs_space.high, shape=obs_space.shape) + + @property + def action_space(self): + action_space = self._inner.action_space + assert isinstance(action_space, gymnasium.spaces.Box) + return gym.spaces.Box(low=action_space.low, high=action_space.high, shape=action_space.shape) + + def __getattr__(self, __name: str) -> Any: + return self._inner[__name] + + if __name__ == '__main__': + from metadrive.envs.scenario_env import ScenarioEnv + + env = GymEnvWrapper(config={"inner_class": ScenarioEnv, "inner_config": {"manual_control": True}}) + o, i = env.reset() + assert isinstance(env.observation_space, gymnasium.Space) + assert isinstance(env.action_space, gymnasium.Space) + for s in range(600): + o, r, d, i = env.step([0, -1]) + env.vehicle.set_velocity([0, 0]) + if d: + assert s == env.config["horizon"] and i["max_step"] and d + break +except ImportError: + pass diff --git a/metadrive/envs/gymnasium_wrapper.py b/metadrive/envs/gymnasium_wrapper.py deleted file mode 100644 index 9df5b3f5d..000000000 --- a/metadrive/envs/gymnasium_wrapper.py +++ /dev/null @@ -1,49 +0,0 @@ -import gymnasium -from gymnasium.spaces import Box - -from metadrive.constants import TerminationState -from metadrive.envs.base_env import BaseEnv - - -class GymnasiumEnvWrapper: - def __init__(self, *args, **kwargs): - super(GymnasiumEnvWrapper, self).__init__(*args, **kwargs) - self._skip_env_checking = True - - def step(self, actions): - o, r, d, i = super(GymnasiumEnvWrapper, self).step(actions) - truncated = True if i[TerminationState.MAX_STEP] else False - return o, r, d, truncated, i - - @property - def observation_space(self): - obs_space = super(GymnasiumEnvWrapper, self).observation_space - return Box(low=obs_space.low, high=obs_space.high, shape=obs_space.shape) - - @property - def action_space(self): - space = super(GymnasiumEnvWrapper, self).action_space - return Box(low=space.low, high=space.high, shape=space.shape) - - def reset(self, *, seed=None, options=None): - return super(GymnasiumEnvWrapper, self).reset(), {} - - @classmethod - def build(cls, base_class): - assert issubclass(base_class, BaseEnv), "The base class should be the subclass of BaseEnv!" - return type("{}({})".format(cls.__name__, base_class.__name__), (cls, base_class), {}) - - -if __name__ == '__main__': - from metadrive.envs.scenario_env import ScenarioEnv - - env = GymnasiumEnvWrapper.build(ScenarioEnv)({"manual_control": True}) - o, i = env.reset() - assert isinstance(env.observation_space, gymnasium.Space) - assert isinstance(env.action_space, gymnasium.Space) - for s in range(600): - o, r, d, t, i = env.step([0, -1]) - env.vehicle.set_velocity([0, 0]) - if d: - assert s == env.config["horizon"] and i["max_step"] and t - break diff --git a/metadrive/envs/marl_envs/marl_bidirection.py b/metadrive/envs/marl_envs/marl_bidirection.py index 168c2e017..e1eb2879c 100644 --- a/metadrive/envs/marl_envs/marl_bidirection.py +++ b/metadrive/envs/marl_envs/marl_bidirection.py @@ -146,7 +146,7 @@ def setup_engine(self): def _draw(): env = MultiAgentBidirectionEnv() - o = env.reset() + o, _ = env.reset() from metadrive.utils.draw_top_down_map import draw_top_down_map import matplotlib.pyplot as plt @@ -175,17 +175,17 @@ def _expert(): "num_agents": 4, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) for r_ in r.values(): total_r += r_ ep_s += 1 - d.update({"total_r": total_r, "episode length": ep_s}) + tm.update({"total_r": total_r, "episode length": ep_s}) # env.render(text=d) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -218,12 +218,12 @@ def _vis_debug_respawn(): "num_agents": 20, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): action = {k: [.0, 1.0] for k in env.vehicles.keys()} - o, r, d, info = env.step(action) + o, r, tm, tc, info = env.step(action) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -236,7 +236,7 @@ def _vis_debug_respawn(): "cam_z": env.main_camera.top_down_camera_height } env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -268,11 +268,11 @@ def _vis(): "num_agents": 20, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step({k: [1.0, .0] for k in env.vehicles.keys()}) + o, r, tm, tc, info = env.step({k: [1.0, .0] for k in env.vehicles.keys()}) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -290,7 +290,7 @@ def _vis(): render_text["dist_to_right"] = env.current_track_vehicle.dist_to_right_side render_text["dist_to_left"] = env.current_track_vehicle.dist_to_left_side env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -307,15 +307,15 @@ def _vis(): def _profile(): import time env = MultiAgentBidirectionEnv({"num_agents": 8}) - obs = env.reset() + obs, _ = env.reset() start = time.time() for s in range(10000): - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) # mask_ratio = env.engine.detector_mask.get_mask_ratio() # print("Mask ratio: ", mask_ratio) - if all(d.values()): + if all(tm.values()): env.reset() if (s + 1) % 100 == 0: print( @@ -349,24 +349,24 @@ def _long_run(): } ) try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) for step in range(10000): act = env.action_space.sample() - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) - if any(d.values()): + if any(tm.values()): print("Current Done: {}\nReward: {}".format(d, r)) - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": print("Info {}: {}\n".format(kkk, i[kkk])) print("\n") for kkk, rrr in r.items(): if rrr == -_out_of_road_penalty: - assert d[kkk] + assert tm[kkk] if (step + 1) % 200 == 0: print( @@ -376,7 +376,7 @@ def _long_run(): for k, oo in o.items()}, r, d, i ) ) - if d["__all__"]: + if tm["__all__"]: print('Current step: ', step) break finally: diff --git a/metadrive/envs/marl_envs/marl_bottleneck.py b/metadrive/envs/marl_envs/marl_bottleneck.py index 68765ba2b..60dccc147 100644 --- a/metadrive/envs/marl_envs/marl_bottleneck.py +++ b/metadrive/envs/marl_envs/marl_bottleneck.py @@ -141,7 +141,7 @@ def setup_engine(self): def _draw(): env = MultiAgentBottleneckEnv() - o = env.reset() + o, _ = env.reset() from metadrive.utils.draw_top_down_map import draw_top_down_map import matplotlib.pyplot as plt @@ -170,17 +170,17 @@ def _expert(): "num_agents": 4, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) for r_ in r.values(): total_r += r_ ep_s += 1 - d.update({"total_r": total_r, "episode length": ep_s}) + tm.update({"total_r": total_r, "episode length": ep_s}) # env.render(text=d) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -213,12 +213,12 @@ def _vis_debug_respawn(): "num_agents": 20, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): action = {k: [.0, 1.0] for k in env.vehicles.keys()} - o, r, d, info = env.step(action) + o, r, tm, tc, info = env.step(action) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -231,7 +231,7 @@ def _vis_debug_respawn(): "cam_z": env.main_camera.top_down_camera_height } env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -263,11 +263,11 @@ def _vis(): "num_agents": 20, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step({k: [1.0, .0] for k in env.vehicles.keys()}) + o, r, tm, tc, info = env.step({k: [1.0, .0] for k in env.vehicles.keys()}) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -285,7 +285,7 @@ def _vis(): render_text["dist_to_right"] = env.current_track_vehicle.dist_to_right_side render_text["dist_to_left"] = env.current_track_vehicle.dist_to_left_side env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -302,15 +302,15 @@ def _vis(): def _profile(): import time env = MultiAgentBottleneckEnv({"num_agents": 8}) - obs = env.reset() + obs, _ = env.reset() start = time.time() for s in range(10000): - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) # mask_ratio = env.engine.detector_mask.get_mask_ratio() # print("Mask ratio: ", mask_ratio) - if all(d.values()): + if all(tm.values()): env.reset() if (s + 1) % 100 == 0: print( @@ -344,34 +344,34 @@ def _long_run(): } ) try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) for step in range(10000): act = env.action_space.sample() - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) - if any(d.values()): - print("Current Done: {}\nReward: {}".format(d, r)) - for kkk, ddd in d.items(): + if any(tm.values()): + print("Current Done: {}\nReward: {}".format(tm, r)) + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": print("Info {}: {}\n".format(kkk, i[kkk])) print("\n") for kkk, rrr in r.items(): if rrr == -_out_of_road_penalty: - assert d[kkk] + assert tm[kkk] if (step + 1) % 200 == 0: print( "{}/{} Agents: {} {}\nO: {}\nR: {}\nD: {}\nI: {}\n\n".format( step + 1, 10000, len(env.vehicles), list(env.vehicles.keys()), {k: (oo.shape, oo.mean(), oo.min(), oo.max()) - for k, oo in o.items()}, r, d, i + for k, oo in o.items()}, r, tm, i ) ) - if d["__all__"]: + if tm["__all__"]: print('Current step: ', step) break finally: diff --git a/metadrive/envs/marl_envs/marl_inout_roundabout.py b/metadrive/envs/marl_envs/marl_inout_roundabout.py index af4ac67b6..9050c8bc7 100644 --- a/metadrive/envs/marl_envs/marl_inout_roundabout.py +++ b/metadrive/envs/marl_envs/marl_inout_roundabout.py @@ -156,7 +156,7 @@ def setup_engine(self): def _draw(): env = MultiAgentRoundaboutEnv() - o = env.reset() + o, _ = env.reset() from metadrive.utils.draw_top_down_map import draw_top_down_map import matplotlib.pyplot as plt @@ -185,17 +185,17 @@ def _expert(): "num_agents": 4, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) for r_ in r.values(): total_r += r_ ep_s += 1 - d.update({"total_r": total_r, "episode length": ep_s}) + tm.update({"total_r": total_r, "episode length": ep_s}) # env.render(text=d) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -228,12 +228,12 @@ def _vis_debug_respawn(): "num_agents": 40, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): action = {k: [0.0, .0] for k in env.vehicles.keys()} - o, r, d, info = env.step(action) + o, r, tm, tc, info = env.step(action) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -246,7 +246,7 @@ def _vis_debug_respawn(): "cam_z": env.main_camera.top_down_camera_height } env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -278,15 +278,15 @@ def _vis(): "num_agents": -1, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step({k: [0, .0] for k in env.vehicles.keys()}) + o, r, tm, tc, info = env.step({k: [0, .0] for k in env.vehicles.keys()}) for r_ in r.values(): total_r += r_ ep_s += 1 - # d.update({"total_r": total_r, "episode length": ep_s}) + # tm.update({"total_r": total_r, "episode length": ep_s}) render_text = { "total_r": total_r, "episode length": ep_s, @@ -295,7 +295,7 @@ def _vis(): "cam_z": env.main_camera.top_down_camera_height } env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -312,15 +312,15 @@ def _vis(): def _profile(): import time env = MultiAgentRoundaboutEnv({"num_agents": 40}) - obs = env.reset() + obs, _ = env.reset() start = time.time() for s in range(10000): - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) # mask_ratio = env.engine.detector_mask.get_mask_ratio() # print("Mask ratio: ", mask_ratio) - if all(d.values()): + if all(tm.values()): env.reset() if (s + 1) % 100 == 0: print( @@ -354,34 +354,34 @@ def _long_run(): } ) try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) for step in range(10000): act = env.action_space.sample() - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) - if any(d.values()): - print("Current Done: {}\nReward: {}".format(d, r)) - for kkk, ddd in d.items(): + if any(tm.values()): + print("Current Done: {}\nReward: {}".format(tm, r)) + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": print("Info {}: {}\n".format(kkk, i[kkk])) print("\n") for kkk, rrr in r.items(): if rrr == -_out_of_road_penalty: - assert d[kkk] + assert tm[kkk] if (step + 1) % 200 == 0: print( "{}/{} Agents: {} {}\nO: {}\nR: {}\nD: {}\nI: {}\n\n".format( step + 1, 10000, len(env.vehicles), list(env.vehicles.keys()), {k: (oo.shape, oo.mean(), oo.min(), oo.max()) - for k, oo in o.items()}, r, d, i + for k, oo in o.items()}, r, tm, i ) ) - if d["__all__"]: + if tm["__all__"]: print('Current step: ', step) break finally: diff --git a/metadrive/envs/marl_envs/marl_intersection.py b/metadrive/envs/marl_envs/marl_intersection.py index 38f2d5211..2a5062834 100644 --- a/metadrive/envs/marl_envs/marl_intersection.py +++ b/metadrive/envs/marl_envs/marl_intersection.py @@ -108,7 +108,7 @@ def setup_engine(self): def _draw(): env = MultiAgentIntersectionEnv() - o = env.reset() + o, _ = env.reset() from metadrive.utils.draw_top_down_map import draw_top_down_map import matplotlib.pyplot as plt @@ -137,17 +137,17 @@ def _expert(): "num_agents": 4, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) for r_ in r.values(): total_r += r_ ep_s += 1 - d.update({"total_r": total_r, "episode length": ep_s}) + tm.update({"total_r": total_r, "episode length": ep_s}) # env.render(text=d) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -180,12 +180,12 @@ def _vis_debug_respawn(): "num_agents": 40, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): action = {k: [0.0, .0] for k in env.vehicles.keys()} - o, r, d, info = env.step(action) + o, r, tm, tc, info = env.step(action) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -198,7 +198,7 @@ def _vis_debug_respawn(): "cam_z": env.main_camera.top_down_camera_height } env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -233,14 +233,14 @@ def _vis(): "delay_done": 2, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): actions = {k: [0.0, 1.0] for k in env.vehicles.keys()} if len(env.vehicles) == 1: actions = {k: [-0, 1.0] for k in env.vehicles.keys()} - o, r, d, info = env.step(actions) + o, r, tm, tc, info = env.step(actions) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -255,7 +255,7 @@ def _vis(): # } # env.render(text=render_text) # env.render(mode="top_down") - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -273,15 +273,15 @@ def _vis(): def _profile(): import time env = MultiAgentIntersectionEnv({"num_agents": 16}) - obs = env.reset() + obs, _ = env.reset() start = time.time() for s in range(10000): - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) # mask_ratio = env.engine.detector_mask.get_mask_ratio() # print("Mask ratio: ", mask_ratio) - if all(d.values()): + if all(tm.values()): env.reset() if (s + 1) % 100 == 0: print( @@ -315,34 +315,34 @@ def _long_run(): } ) try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) for step in range(10000): act = env.action_space.sample() - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) - if any(d.values()): - print("Current Done: {}\nReward: {}".format(d, r)) - for kkk, ddd in d.items(): + if any(tm.values()): + print("Current Done: {}\nReward: {}".format(tm, r)) + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": print("Info {}: {}\n".format(kkk, i[kkk])) print("\n") for kkk, rrr in r.items(): if rrr == -_out_of_road_penalty: - assert d[kkk] + assert tm[kkk] if (step + 1) % 200 == 0: print( "{}/{} Agents: {} {}\nO: {}\nR: {}\nD: {}\nI: {}\n\n".format( step + 1, 10000, len(env.vehicles), list(env.vehicles.keys()), {k: (oo.shape, oo.mean(), oo.min(), oo.max()) - for k, oo in o.items()}, r, d, i + for k, oo in o.items()}, r, tm, i ) ) - if d["__all__"]: + if tm["__all__"]: print('Current step: ', step) break finally: diff --git a/metadrive/envs/marl_envs/marl_parking_lot.py b/metadrive/envs/marl_envs/marl_parking_lot.py index e78cb4856..ac1dcba49 100644 --- a/metadrive/envs/marl_envs/marl_parking_lot.py +++ b/metadrive/envs/marl_envs/marl_parking_lot.py @@ -212,7 +212,7 @@ def _get_out_spawn_roads(parking_space_num): ret.append(Road(ParkingLot.node(1, i, 5), ParkingLot.node(1, i, 6))) return ret - def _merge_extra_config(self, config) -> "Config": + def _merge_extra_config(self, config) -> Config: ret_config = super(MultiAgentParkingLotEnv, self)._merge_extra_config(config) # add extra assert parking_space_num = ret_config["parking_space_num"] @@ -286,7 +286,7 @@ def setup_engine(self): def _draw(): env = MultiAgentParkingLotEnv() - o = env.reset() + o, _ = env.reset() from metadrive.utils.draw_top_down_map import draw_top_down_map import matplotlib.pyplot as plt @@ -315,17 +315,17 @@ def _expert(): "num_agents": 3, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) for r_ in r.values(): total_r += r_ ep_s += 1 - d.update({"total_r": total_r, "episode length": ep_s}) + tm.update({"total_r": total_r, "episode length": ep_s}) # env.render(text=d) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -358,12 +358,12 @@ def _vis_debug_respawn(): "num_agents": 11, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): action = {k: [0.0, .0] for k in env.vehicles.keys()} - o, r, d, info = env.step(action) + o, r, tm, tc, info = env.step(action) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -376,7 +376,7 @@ def _vis_debug_respawn(): "cam_z": env.main_camera.top_down_camera_height } env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -414,14 +414,14 @@ def _vis(): # "parking_space_num": 4 } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): actions = {k: [1.0, .0] for k in env.vehicles.keys()} if len(env.vehicles) == 1: actions = {k: [-1.0, .0] for k in env.vehicles.keys()} - o, r, d, info = env.step(actions) + o, r, tm, tc, info = env.step(actions) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -463,7 +463,7 @@ def _vis(): } ) ) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -481,15 +481,15 @@ def _vis(): def _profile(): import time env = MultiAgentParkingLotEnv({"num_agents": 10}) - obs = env.reset() + obs, _ = env.reset() start = time.time() for s in range(10000): - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) # mask_ratio = env.engine.detector_mask.get_mask_ratio() # print("Mask ratio: ", mask_ratio) - if all(d.values()): + if all(tm.values()): env.reset() if (s + 1) % 100 == 0: print( @@ -523,17 +523,17 @@ def _long_run(): } ) try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) for step in range(10000): act = env.action_space.sample() - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) if step == 0: assert not any(d.values()) - if any(d.values()): + if any(tm.values()): print("Current Done: {}\nReward: {}".format(d, r)) - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": print("Info {}: {}\n".format(kkk, i[kkk])) print("\n") @@ -550,7 +550,7 @@ def _long_run(): for k, oo in o.items()}, r, d, i ) ) - if d["__all__"]: + if tm["__all__"]: print('Current step: ', step) break finally: diff --git a/metadrive/envs/marl_envs/marl_tollgate.py b/metadrive/envs/marl_envs/marl_tollgate.py index 420fcc9b4..51033ab93 100644 --- a/metadrive/envs/marl_envs/marl_tollgate.py +++ b/metadrive/envs/marl_envs/marl_tollgate.py @@ -1,4 +1,5 @@ -import gym +import gymnasium as gym + import numpy as np from metadrive.component.map.pg_map import PGMap @@ -272,9 +273,9 @@ def get_single_observation(self, vehicle_config): return o def step(self, actions): - o, r, d, i = super(MultiAgentTollgateEnv, self).step(actions) + o, r, tm, tc, i = super(MultiAgentTollgateEnv, self).step(actions) self.stay_time_manager.record(self.agent_manager.active_agents, self.episode_step) - return o, r, d, i + return o, r, tm, tc, i def setup_engine(self): super(MultiAgentTollgateEnv, self).setup_engine() @@ -283,7 +284,7 @@ def setup_engine(self): def _draw(): env = MultiAgentTollgateEnv() - o = env.reset() + o, _ = env.reset() from metadrive.utils.draw_top_down_map import draw_top_down_map import matplotlib.pyplot as plt @@ -312,17 +313,17 @@ def _expert(): "num_agents": 4, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) for r_ in r.values(): total_r += r_ ep_s += 1 - d.update({"total_r": total_r, "episode length": ep_s}) + tm.update({"total_r": total_r, "episode length": ep_s}) # env.render(text=d) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -355,12 +356,12 @@ def _vis_debug_respawn(): "num_agents": 20, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): action = {k: [.0, 1.0] for k in env.vehicles.keys()} - o, r, d, info = env.step(action) + o, r, tm, tc, info = env.step(action) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -373,7 +374,7 @@ def _vis_debug_respawn(): "cam_z": env.main_camera.top_down_camera_height } env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.agent_manager.next_agent_count @@ -410,11 +411,11 @@ def _vis(): "num_agents": 18, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 ep_s = 0 for i in range(1, 100000): - o, r, d, info = env.step({k: [0, 1] for k in env.vehicles.keys()}) + o, r, tm, tc, info = env.step({k: [0, 1] for k in env.vehicles.keys()}) for r_ in r.values(): total_r += r_ ep_s += 1 @@ -435,7 +436,7 @@ def _vis(): render_text["lane"] = env.current_track_vehicle.lane_index render_text["block"] = env.current_track_vehicle.navigation.current_road.block_ID() env.render(text=render_text) - if d["__all__"]: + if tm["__all__"]: print(info) print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( @@ -453,15 +454,15 @@ def _vis(): def _profile(): import time env = MultiAgentTollgateEnv({"num_agents": 8}) - obs = env.reset() + obs, _ = env.reset() start = time.time() for s in range(10000): - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) # mask_ratio = env.engine.detector_mask.get_mask_ratio() # print("Mask ratio: ", mask_ratio) - if all(d.values()): + if all(tm.values()): env.reset() if (s + 1) % 100 == 0: print( @@ -470,7 +471,7 @@ def _profile(): time.time() - start, (s + 1) / (time.time() - start) ) ) - print(f"(MetaDriveEnv) Total Time Elapse: {time.time() - start}") + print(f"(MetaDriveEnv) Total Time Elapsed: {time.time() - start}") def _long_run(): @@ -495,34 +496,34 @@ def _long_run(): } ) try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) for step in range(10000): act = env.action_space.sample() - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) - if any(d.values()): - print("Current Done: {}\nReward: {}".format(d, r)) - for kkk, ddd in d.items(): + if any(tm.values()): + print("Current Done: {}\nReward: {}".format(tm, r)) + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": print("Info {}: {}\n".format(kkk, i[kkk])) print("\n") for kkk, rrr in r.items(): if rrr == -_out_of_road_penalty: - assert d[kkk] + assert tm[kkk] if (step + 1) % 200 == 0: print( "{}/{} Agents: {} {}\nO: {}\nR: {}\nD: {}\nI: {}\n\n".format( step + 1, 10000, len(env.vehicles), list(env.vehicles.keys()), {k: (oo.shape, oo.mean(), oo.min(), oo.max()) - for k, oo in o.items()}, r, d, i + for k, oo in o.items()}, r, tm, i ) ) - if d["__all__"]: + if tm["__all__"]: print('Current step: ', step) break finally: diff --git a/metadrive/envs/marl_envs/multi_agent_metadrive.py b/metadrive/envs/marl_envs/multi_agent_metadrive.py index 0116d721c..f31865cc7 100644 --- a/metadrive/envs/marl_envs/multi_agent_metadrive.py +++ b/metadrive/envs/marl_envs/multi_agent_metadrive.py @@ -1,6 +1,7 @@ import copy from metadrive.component.vehicle_module.vehicle_panel import VehiclePanel import logging +from typing import Dict, Any from metadrive.component.pgblock.first_block import FirstPGBlock from metadrive.component.road_network import Road @@ -17,26 +18,21 @@ random_agent_model=False, # If True, the spawn position will be deterministic for each episode, usually used for deterministic test force_seed_spawn_manager=False, - # Whether to terminate a vehicle if it crash with others. Since in MA env the crash is extremely dense, so # frequently done might not be a good idea. crash_done=True, out_of_road_done=True, delay_done=25, # Put the dead vehicles in place for 5 seconds before removing them. - # Whether the vehicle can rejoin the episode allow_respawn=True, spawn_roads=[Road(FirstPGBlock.NODE_2, FirstPGBlock.NODE_3)], - # The maximum length of the episode. If allow respawn, then this is the maximum step that respawn can happen. After # that, the episode won't terminate until all existing vehicles reach their horizon or done. The vehicle specified # horizon is also this value. horizon=1000, max_step_per_agent=1000, # Per agent maximum episode steps - # Use to determine what neighborhood means neighbours_distance=10, - # ===== Vehicle Setting ===== vehicle_config=dict( lidar=dict(num_lasers=72, distance=40, num_others=0), @@ -44,7 +40,6 @@ spawn_lane_index=(FirstPGBlock.NODE_1, FirstPGBlock.NODE_2, 0), _specified_spawn_lane=False, # automatically filled _specified_destination=False, # automatically filled - # We remove dynamics randomization in Multi-agent environments to make the results aligned with previous # results published in papers. See # https://github.com/metadriverse/metadrive/issues/161#issuecomment-1080114029 @@ -52,7 +47,6 @@ vehicle_model="static_default", ), target_vehicle_configs=dict(), - # ===== New Reward Setting ===== out_of_road_penalty=10, crash_vehicle_penalty=10, @@ -60,12 +54,11 @@ crash_vehicle_cost=1, crash_object_cost=1, out_of_road_cost=0, # Do not count out of road into cost! - # ===== Environmental Setting ===== - traffic_density=0., + traffic_density=0.0, camera_height=4, map_file_path="", - interface_panel=[VehiclePanel] + interface_panel=[VehiclePanel], ) @@ -85,7 +78,9 @@ def default_config() -> Config: def _merge_extra_config(self, config) -> "Config": ret_config = self.default_config().update( - config, allow_add_new_key=False, stop_recursive_update=["target_vehicle_configs"] + config, + allow_add_new_key=False, + stop_recursive_update=["target_vehicle_configs"], ) # if not ret_config["crash_done"] and ret_config["crash_vehicle_penalty"] > 2: # logging.warning( @@ -103,21 +98,28 @@ def _merge_extra_config(self, config) -> "Config": def _post_process_config(self, config): from metadrive.manager.spawn_manager import SpawnManager + config = super(MultiAgentMetaDrive, self)._post_process_config(config) ret_config = config # merge basic vehicle config into target vehicle config target_vehicle_configs = dict() - num_agents = ret_config["num_agents"] if ret_config["num_agents"] != -1 else SpawnManager.max_capacity( - config["spawn_roads"], config["map_config"]["exit_length"], config["map_config"]["lane_num"] + num_agents = ( + ret_config["num_agents"] if ret_config["num_agents"] != -1 else SpawnManager.max_capacity( + config["spawn_roads"], + config["map_config"]["exit_length"], + config["map_config"]["lane_num"], + ) ) for id in range(num_agents): agent_id = "agent{}".format(id) config = copy.deepcopy(ret_config["vehicle_config"]) if agent_id in ret_config["target_vehicle_configs"]: - config["_specified_spawn_lane" - ] = True if "spawn_lane_index" in ret_config["target_vehicle_configs"][agent_id] else False - config["_specified_destination" - ] = True if "destination" in ret_config["target_vehicle_configs"][agent_id] else False + config["_specified_spawn_lane"] = ( + True if "spawn_lane_index" in ret_config["target_vehicle_configs"][agent_id] else False + ) + config["_specified_destination"] = ( + True if "destination" in ret_config["target_vehicle_configs"][agent_id] else False + ) config.update(ret_config["target_vehicle_configs"][agent_id]) target_vehicle_configs[agent_id] = config ret_config["target_vehicle_configs"] = target_vehicle_configs @@ -126,23 +128,27 @@ def _post_process_config(self, config): def done_function(self, vehicle_id): done, done_info = super(MultiAgentMetaDrive, self).done_function(vehicle_id) if done_info[TerminationState.CRASH] and (not self.config["crash_done"]): - assert done_info[TerminationState.CRASH_VEHICLE] or \ - done_info[TerminationState.SUCCESS] or done_info[TerminationState.OUT_OF_ROAD] + assert ( + done_info[TerminationState.CRASH_VEHICLE] or done_info[TerminationState.SUCCESS] + or done_info[TerminationState.OUT_OF_ROAD] + ) if not (done_info[TerminationState.SUCCESS] or done_info[TerminationState.OUT_OF_ROAD]): # Does not revert done if high-priority termination happens! done = False if done_info[TerminationState.OUT_OF_ROAD] and (not self.config["out_of_road_done"]): - assert done_info[TerminationState.CRASH_VEHICLE] or \ - done_info[TerminationState.SUCCESS] or done_info[TerminationState.OUT_OF_ROAD] + assert ( + done_info[TerminationState.CRASH_VEHICLE] or done_info[TerminationState.SUCCESS] + or done_info[TerminationState.OUT_OF_ROAD] + ) if not done_info[TerminationState.SUCCESS]: done = False return done, done_info def step(self, actions): - o, r, d, i = super(MultiAgentMetaDrive, self).step(actions) - o, r, d, i = self._after_vehicle_done(o, r, d, i) + o, r, tm, tc, i = super(MultiAgentMetaDrive, self).step(actions) + o, r, tm, tc, i = self._after_vehicle_done(o, r, tm, tc, i) # Update respawn manager if self.episode_step >= self.config["horizon"]: @@ -153,41 +159,47 @@ def step(self, actions): o[new_id] = new_obs r[new_id] = 0.0 i[new_id] = new_info_dict[new_id] - d[new_id] = False + tm[new_id] = False + tc[new_id] = False # Update __all__ d_all = False - if self.config["horizon"] is not None: # No agent alive or a too long episode happens - if (self.episode_step >= self.config["horizon"] and all(d.values())) or \ - (self.episode_step >= 5 * self.config["horizon"]): + if (self.config["horizon"] is not None): # No agent alive or a too long episode happens + if (self.episode_step >= self.config["horizon"] and (all(tc.values()) or all(tm.values())) + or (self.episode_step >= 5 * self.config["horizon"])): d_all = True if len(self.vehicles) == 0: # No agent alive d_all = True - d["__all__"] = d_all - if d["__all__"]: - for k in d.keys(): - d[k] = True + tm["__all__"] = d_all + if tm["__all__"]: + for k in tm.keys(): + tm[k] = True - return o, r, d, i + return o, r, tm, tc, i - def _after_vehicle_done(self, obs=None, reward=None, dones: dict = None, info=None): + def _after_vehicle_done( + self, obs: Dict[str, Any], reward: Dict[str, float], terminated: Dict[str, bool], truncated: Dict[str, bool], + info: Dict[str, Any] + ): for v_id, v_info in info.items(): if v_info.get("episode_length", 0) >= self.config["horizon"]: - if dones[v_id] is not None: + if terminated[v_id] is not None: info[v_id][TerminationState.MAX_STEP] = True - dones[v_id] = True + terminated[v_id] = True self.dones[v_id] = True - for dead_vehicle_id, done in dones.items(): - if done: + for dead_vehicle_id, termed in terminated.items(): + if termed: self.agent_manager.finish( - dead_vehicle_id, ignore_delay_done=info[dead_vehicle_id].get(TerminationState.SUCCESS, False) + dead_vehicle_id, + ignore_delay_done=info[dead_vehicle_id].get(TerminationState.SUCCESS, False), ) self._update_camera_after_finish() - return obs, reward, dones, info + return obs, reward, terminated, truncated, info def _update_camera_after_finish(self): - if self.main_camera is not None and self.current_track_vehicle.id not in self.engine.agent_manager._active_objects \ - and self.engine.task_manager.hasTaskNamed(self.main_camera.CHASE_TASK_NAME): + if (self.main_camera is not None + and self.current_track_vehicle.id not in self.engine.agent_manager._active_objects + and self.engine.task_manager.hasTaskNamed(self.main_camera.CHASE_TASK_NAME)): self.switch_to_third_person_view() def _get_observations(self): @@ -246,21 +258,23 @@ def _test(): "num_agents": 12, "allow_respawn": False, "use_render": True, + "render_mode": "top_down", "debug": False, "manual_control": True, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 for i in range(1, 100000): - # o, r, d, info = env.step(env.action_space.sample()) - o, r, d, info = env.step({v_id: [0, 1] for v_id in env.vehicles.keys()}) + # o, r, tm, tc, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step({v_id: [0, 1] for v_id in env.vehicles.keys()}) for r_ in r.values(): total_r += r_ - # o, r, d, info = env.step([0,1]) - d.update({"total_r": total_r}) + # o, r, tm, tc, info = env.step([0,1]) + # TODO: why does this make sense? total_r is not a vehicle id. + # d.update({"total_r": total_r}) # env.render(text=d) - env.render(mode="top_down") + env.render() if len(env.vehicles) == 0: total_r = 0 print("Reset") @@ -273,6 +287,7 @@ def _vis(): env = MultiAgentMetaDrive( { "use_render": True, + "render_mode": "top_down", "num_agents": 5, "start_seed": 8000, "num_scenarios": 1, @@ -289,22 +304,22 @@ def _vis(): }, "agent2": { "vehicle_model": "xl" - } + }, }, # "allow_respawn": False, # "manual_control": True, } ) - o = env.reset() + o, _ = env.reset() total_r = 0 for i in range(1, 100000): - # o, r, d, info = env.step(env.action_space.sample()) - o, r, d, info = env.step({v_id: [0.0, 0.0] for v_id in env.vehicles.keys()}) + # o, r, tm, tc, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step({v_id: [0.0, 0.0] for v_id in env.vehicles.keys()}) for r_ in r.values(): total_r += r_ - # o, r, d, info = env.step([0,1]) - # d.update({"total_r": total_r}) - env.render(mode="top_down") + # o, r, tm, tc, info = env.step([0,1]) + # tm.update({"total_r": total_r}) + env.render() # env.reset() if len(env.vehicles) == 0: total_r = 0 @@ -313,24 +328,36 @@ def _vis(): env.close() -def pygame_replay(name, env_class, save=False, other_traj=None, film_size=(1000, 1000), extra_config={}): +def pygame_replay( + name, + env_class, + save=False, + other_traj=None, + film_size=(1000, 1000), + extra_config={}, +): import copy import json import pygame + extra_config["use_render"] = True + extra_config["render_mode"] = "top_down" env = env_class(extra_config) ckpt = "metasvodist_{}_best.json".format(name) if other_traj is None else other_traj with open(ckpt, "r") as f: traj = json.load(f) - o = env.reset(copy.deepcopy(traj)) + o, _ = env.reset(copy.deepcopy(traj)) env.main_camera.set_follow_lane(True) frame_count = 0 while True: - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) env.engine.force_fps.toggle() - env.render(mode="top_down", num_stack=50, film_size=film_size, history_smooth=0) + env.render(num_stack=50, film_size=film_size, history_smooth=0) if save: - pygame.image.save(env._top_down_renderer._runtime_canvas, "{}_{}.png".format(name, frame_count)) + pygame.image.save( + env._top_down_renderer._runtime_canvas, + "{}_{}.png".format(name, frame_count), + ) frame_count += 1 if len(env.engine.replay_manager.restore_episode_info) == 0: env.close() @@ -340,24 +367,28 @@ def panda_replay(name, env_class, save=False, other_traj=None, extra_config={}): import copy import json import pygame + extra_config.update({"use_render": True}) env = env_class(extra_config) ckpt = "metasvodist_{}_best.json".format(name) if other_traj is None else other_traj with open(ckpt, "r") as f: traj = json.load(f) - o = env.reset(copy.deepcopy(traj)) + o, _ = env.reset(copy.deepcopy(traj)) env.main_camera.set_follow_lane(True) frame_count = 0 while True: - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) env.engine.force_fps.toggle() if save: - pygame.image.save(env._top_down_renderer._runtime_canvas, "{}_{}.png".format(name, frame_count)) + pygame.image.save( + env._top_down_renderer._runtime_canvas, + "{}_{}.png".format(name, frame_count), + ) frame_count += 1 if len(env.engine.replay_manager.restore_episode_info) == 0: env.close() -if __name__ == '__main__': +if __name__ == "__main__": # _test() _vis() diff --git a/metadrive/envs/marl_envs/tinyinter.py b/metadrive/envs/marl_envs/tinyinter.py index 23ed325f7..d3251763d 100644 --- a/metadrive/envs/marl_envs/tinyinter.py +++ b/metadrive/envs/marl_envs/tinyinter.py @@ -1,6 +1,6 @@ import copy -import gym +import gymnasium as gym import numpy as np from metadrive.envs.marl_envs.marl_intersection import MultiAgentIntersectionEnv @@ -367,13 +367,13 @@ def _get_reset_return(self): return self.agent_manager.filter_RL_agents(org) def step(self, actions): - o, r, d, i = super(MultiAgentTinyInter, self).step(actions) + o, r, tm, tc, i = super(MultiAgentTinyInter, self).step(actions) # if self.num_RL_agents == self.num_agents: - # return o, r, d, i + # return o, r, tm, tc, i - original_done_dict = copy.deepcopy(d) - d = self.agent_manager.filter_RL_agents(d, original_done_dict=original_done_dict) + original_done_dict = copy.deepcopy(tm) + d = self.agent_manager.filter_RL_agents(tm, original_done_dict=original_done_dict) if "__all__" in d: d.pop("__all__") # assert len(d) == self.agent_manager.num_RL_agents, d @@ -382,6 +382,7 @@ def step(self, actions): self.agent_manager.filter_RL_agents(o, original_done_dict=original_done_dict), self.agent_manager.filter_RL_agents(r, original_done_dict=original_done_dict), d, + self.agent_manager.filter_RL_agents(tc, original_done_dict=original_done_dict), self.agent_manager.filter_RL_agents(i, original_done_dict=original_done_dict), ) @@ -435,7 +436,7 @@ def get_single_observation(self, vehicle_config: "Config"): # "debug_static_world": True, } ) - o = env.reset() + o, _ = env.reset() # env.engine.force_fps.toggle() print("vehicle num", len(env.engine.traffic_manager.vehicles)) print("RL agent num", len(o)) @@ -444,11 +445,11 @@ def get_single_observation(self, vehicle_config: "Config"): ep_reward_sum = 0.0 ep_success_reward_sum = 0.0 for i in range(1, 100000): - o, r, d, info = env.step({k: [0.0, 1.0] for k in env.action_space.sample().keys()}) + o, r, tm, tc, info = env.step({k: [0.0, 1.0] for k in env.action_space.sample().keys()}) # env.render("top_down", camera_position=(42.5, 0), film_size=(500, 500)) vehicles = env.vehicles - for k, v in d.items(): + for k, v in tm.items(): if v and k in info: ep_success += int(info[k]["arrive_dest"]) ep_reward_sum += int(info[k]["episode_reward"]) @@ -456,14 +457,14 @@ def get_single_observation(self, vehicle_config: "Config"): if info[k]["arrive_dest"]: ep_success_reward_sum += int(info[k]["episode_reward"]) - # if not d["__all__"]: + # if not te["__all__"]: # assert sum( # [env.engine.get_policy(v.name).__class__.__name__ == "EnvInputPolicy" for k, v in vehicles.items()] # ) == env.config["num_RL_agents"] - if any(d.values()): - print("Somebody dead.", d, info) + if any(tm.values()): + print("Somebody dead.", tm, info) # print("Step {}. Policies: {}".format(i, {k: v['policy'] for k, v in info.items()})) - if d["__all__"]: + if tm["__all__"]: # assert i >= 1000 print("Reset. ", i, info) # break diff --git a/metadrive/envs/metadrive_env.py b/metadrive/envs/metadrive_env.py index 10d977cb7..74381f6cb 100644 --- a/metadrive/envs/metadrive_env.py +++ b/metadrive/envs/metadrive_env.py @@ -90,14 +90,14 @@ class MetaDriveEnv(BaseEnv): @classmethod - def default_config(cls) -> "Config": + def default_config(cls) -> Config: config = super(MetaDriveEnv, cls).default_config() config.update(METADRIVE_DEFAULT_CONFIG) config.register_type("map", str, int) config["map_config"].register_type("config", None) return config - def __init__(self, config: dict = None): + def __init__(self, config: Union[dict, None] = None): self.default_config_copy = Config(self.default_config(), unchangeable=True) super(MetaDriveEnv, self).__init__(config) @@ -106,7 +106,7 @@ def __init__(self, config: dict = None): self.env_num = self.config["num_scenarios"] self.in_stop = False - def _merge_extra_config(self, config: Union[dict, "Config"]) -> "Config": + def _merge_extra_config(self, config: Union[dict, Config]) -> Config: config = self.default_config().update(config, allow_add_new_key=False) if config["vehicle_config"]["lidar"]["distance"] > 50: config["max_distance"] = config["vehicle_config"]["lidar"]["distance"] @@ -348,14 +348,14 @@ def _reset_global_seed(self, force_seed=None): def _act(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) assert env.observation_space.contains(obs) assert np.isscalar(reward) assert isinstance(info, dict) env = MetaDriveEnv() try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) _act(env, env.action_space.sample()) for x in [-1, 0, 1]: diff --git a/metadrive/envs/mixed_traffic_env.py b/metadrive/envs/mixed_traffic_env.py index 39c5313e3..17b6db6e9 100644 --- a/metadrive/envs/mixed_traffic_env.py +++ b/metadrive/envs/mixed_traffic_env.py @@ -30,15 +30,15 @@ def setup_engine(self): } ) try: - obs = env.reset() + obs, _ = env.reset() obs_space = env.observation_space assert obs_space.contains(obs) for _ in range(100000): assert env.observation_space.contains(obs) - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) assert obs_space.contains(o) - if d: - o = env.reset() + if tm or tc: + o, _ = env.reset() assert obs_space.contains(o) finally: env.close() diff --git a/metadrive/envs/real_data_envs/mix_waymo_pg_env.py b/metadrive/envs/real_data_envs/mix_waymo_pg_env.py index 971dc45e6..42657bdfa 100644 --- a/metadrive/envs/real_data_envs/mix_waymo_pg_env.py +++ b/metadrive/envs/real_data_envs/mix_waymo_pg_env.py @@ -139,11 +139,11 @@ def _init_pg_episode(self): ) self.config["target_vehicle_configs"]["default_agent"]["destination"] = None - def reset(self, force_seed: Union[None, int] = None): + def reset(self, seed: Union[None, int] = None): self.change_suite() # ===== same as BaseEnv ===== self.lazy_init() # it only works the first time when reset() is called to avoid the error when render - self._reset_global_seed(force_seed) + self._reset_global_seed(seed) if self.engine is None: raise ValueError( "Current MetaDrive instance is broken. Please make sure there is only one active MetaDrive " @@ -231,7 +231,7 @@ def __init__(self, config=None): ) env.reset() while True: - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) env.render(text={"ts": env.episode_step}) - if d: + if tm or tc: env.reset() diff --git a/metadrive/envs/real_data_envs/nuplan_env.py b/metadrive/envs/real_data_envs/nuplan_env.py index 181b2234d..f7f9fa63e 100644 --- a/metadrive/envs/real_data_envs/nuplan_env.py +++ b/metadrive/envs/real_data_envs/nuplan_env.py @@ -349,7 +349,7 @@ def _is_out_of_road(self, vehicle): env.reset() # env.reset(seed) for i in range(env.engine.data_manager.current_scenario_length * 10): - o, r, d, info = env.step([0, 0]) + o, r, tc, tm, info = env.step([0, 0]) env.render(text={"seed": env.current_seed}) if info["replay_done"]: break diff --git a/metadrive/envs/real_data_envs/nuscenes_env.py b/metadrive/envs/real_data_envs/nuscenes_env.py index de9408522..6964b546e 100644 --- a/metadrive/envs/real_data_envs/nuscenes_env.py +++ b/metadrive/envs/real_data_envs/nuscenes_env.py @@ -76,12 +76,12 @@ while True: # for i in range(10): start_reset = time.time() - env.reset(force_seed=0) + env.reset(seed=0) reset_used_time += time.time() - start_reset reset_num += 1 for t in range(10000): - o, r, d, info = env.step([1, 0.88]) + o, r, tm, tc, info = env.step([1, 0.88]) assert env.observation_space.contains(o) s += 1 if env.config["use_render"]: @@ -97,7 +97,7 @@ }, # mode="topdown" ) - if d: + if tm or tc: print( "Time elapse: {:.4f}. Average FPS: {:.4f}, AVG_Reset_time: {:.4f}".format( time.time() - start, s / (time.time() - start - reset_used_time), reset_used_time / reset_num diff --git a/metadrive/envs/real_data_envs/waymo_env.py b/metadrive/envs/real_data_envs/waymo_env.py index 7d5faeeaa..6214267b2 100644 --- a/metadrive/envs/real_data_envs/waymo_env.py +++ b/metadrive/envs/real_data_envs/waymo_env.py @@ -59,10 +59,10 @@ def _merge_extra_config(self, config): success = [] while True: for i in range(3): - env.reset(force_seed=i) + env.reset(seed=i) while True: step_start = time.time() - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert env.observation_space.contains(o) # c_lane = env.vehicle.lane # long, lat, = c_lane.local_coordinates(env.vehicle.position) @@ -75,7 +75,7 @@ def _merge_extra_config(self, config): # mode="topdown" ) - if d: + if tm or tc: if info["arrive_dest"]: print("seed:{}, success".format(env.engine.global_random_seed)) break diff --git a/metadrive/envs/remote_env.py b/metadrive/envs/remote_env.py index f7bec587d..cd71c81cc 100644 --- a/metadrive/envs/remote_env.py +++ b/metadrive/envs/remote_env.py @@ -2,7 +2,7 @@ This file provide a RemoteMetaDrive environment which can be easily ran in single process! """ -import gym +import gymnasium as gym from metadrive.envs.metadrive_env import MetaDriveEnv diff --git a/metadrive/envs/safe_metadrive_env.py b/metadrive/envs/safe_metadrive_env.py index 5be4b7d35..2ca4878ef 100644 --- a/metadrive/envs/safe_metadrive_env.py +++ b/metadrive/envs/safe_metadrive_env.py @@ -90,10 +90,10 @@ def setup_engine(self): } ) - o = env.reset() + o, _ = env.reset() total_cost = 0 for i in range(1, 100000): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) total_cost += info["cost"] env.render( text={ diff --git a/metadrive/envs/scenario_env.py b/metadrive/envs/scenario_env.py index dbc555afb..d3ff4fd00 100644 --- a/metadrive/envs/scenario_env.py +++ b/metadrive/envs/scenario_env.py @@ -484,11 +484,11 @@ def stop(self): } ) success = [] - env.reset(force_seed=0) + env.reset(seed=0) while True: - env.reset(force_seed=env.current_seed + 1) + env.reset(seed=env.current_seed + 1) for t in range(10000): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert env.observation_space.contains(o) c_lane = env.vehicle.lane long, lat, = c_lane.local_coordinates(env.vehicle.position) @@ -502,6 +502,6 @@ def stop(self): # mode="topdown" ) - if d and info["arrive_dest"]: + if (tm or tc) and info["arrive_dest"]: print("seed:{}, success".format(env.engine.global_random_seed)) break diff --git a/metadrive/envs/top_down_env.py b/metadrive/envs/top_down_env.py index 7a68567e5..c0c24c5fb 100644 --- a/metadrive/envs/top_down_env.py +++ b/metadrive/envs/top_down_env.py @@ -104,7 +104,7 @@ def get_single_observation(self, _=None): o, *_ = env.step([-0.00, 0.2]) assert env.observation_space.contains(o) for _ in range(10000): - o, r, d, i = env.step([0.0, 1]) + o, r, tm, tc, i = env.step([0.0, 1]) print("Velocity: ", i["velocity"]) fig, axes = plt.subplots(1, o.shape[-1], figsize=(15, 3)) diff --git a/metadrive/envs/varying_dynamics_env.py b/metadrive/envs/varying_dynamics_env.py index 6074057cf..8561929e7 100644 --- a/metadrive/envs/varying_dynamics_env.py +++ b/metadrive/envs/varying_dynamics_env.py @@ -67,9 +67,9 @@ def _get_agent_manager(self): "num_scenarios": 10, # Allow 10 random envs. }) for ep in range(3): - obs = env.reset() + obs, _ = env.reset() print("Current Dynamics Parameters:", env.vehicle.get_dynamics_parameters()) for step in range(1000): - o, r, d, i = env.step(env.action_space.sample()) - if d: + o, r, tm, tc, i = env.step(env.action_space.sample()) + if tm or tc: break diff --git a/metadrive/examples/Basic_MetaDrive_Usages.ipynb b/metadrive/examples/Basic_MetaDrive_Usages.ipynb index 4656d29b8..d74f47615 100644 --- a/metadrive/examples/Basic_MetaDrive_Usages.ipynb +++ b/metadrive/examples/Basic_MetaDrive_Usages.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "J1w-zrxAUr4l", @@ -23,6 +24,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "2efvTXdHVptN", @@ -84,6 +86,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "QhteKnQ5kqEg", @@ -123,7 +126,8 @@ "# Set the envrionment config\n", "config = {\"start_seed\": 1000, \n", " \"environment_num\":1,\n", - " \"traffic_density\":0.1\n", + " \"traffic_density\":0.1,\n", + " \"render_mode\": \"top_down\", \n", " }\n", "\n", "env = MetaDriveEnv(config)\n", @@ -131,14 +135,14 @@ "print(\"Starting the environment ...\\n\")\n", "\n", "ep_reward = 0.0\n", - "obs = env.reset()\n", + "obs, info = env.reset()\n", "frames = []\n", "for i in range(1000):\n", - " obs, reward, done, info = env.step(expert(env.vehicle))\n", + " obs, reward, terminated, truncated, info = env.step(expert(env.vehicle))\n", " ep_reward += reward\n", - " frame = env.render(mode=\"top_down\", film_size=(800, 800), track_target_vehicle=True, screen_size=(500, 500))\n", + " frame = env.render(film_size=(800, 800), track_target_vehicle=True, screen_size=(500, 500))\n", " frames.append(frame)\n", - " if done:\n", + " if terminated or truncated:\n", " print(\"Arriving Destination: {}\".format(info[\"arrive_dest\"]))\n", " print(\"\\nEpisode reward: \", ep_reward)\n", " break\n", @@ -176,11 +180,12 @@ "from metadrive import SafeMetaDriveEnv\n", "from metadrive.examples import expert\n", "\n", - "# Set the envrionment config\n", + "# Set the environment config\n", "config = {\"start_seed\": 0, \n", " \"environment_num\":1,\n", " \"accident_prob\":1.0,\n", - " \"traffic_density\":0.25\n", + " \"traffic_density\":0.25,\n", + " \"render_mode\": \"top_down\"\n", " }\n", "\n", "env = SafeMetaDriveEnv(config)\n", @@ -189,15 +194,15 @@ "\n", "ep_reward = 0.0\n", "ep_cost = 0.0\n", - "obs = env.reset()\n", + "obs, info = env.reset()\n", "frames = []\n", "for i in range(1000):\n", - " obs, reward, done, info = env.step(expert(env.vehicle))\n", + " obs, reward, terminated, truncated, info = env.step(expert(env.vehicle))\n", " ep_reward += reward\n", " ep_cost += info[\"cost\"]\n", - " frame = env.render(mode=\"top_down\", film_size=(1500, 1500), track_target_vehicle=True, screen_size=(500, 500))\n", + " frame = env.render(film_size=(1500, 1500), track_target_vehicle=True, screen_size=(500, 500))\n", " frames.append(frame)\n", - " if done:\n", + " if terminated or truncated:\n", " print(\"Arriving Destination: {}\".format(info[\"arrive_dest\"]))\n", " print(\"\\nEpisode reward: \", ep_reward)\n", " print(\"\\nEpisode cost: \", ep_cost)\n", @@ -234,27 +239,27 @@ "from metadrive import MultiAgentRoundaboutEnv, MultiAgentBottleneckEnv, MultiAgentIntersectionEnv, MultiAgentParkingLotEnv, MultiAgentTollgateEnv\n", "from metadrive.examples import expert\n", "\n", - "envs = [MultiAgentRoundaboutEnv(), \n", - " MultiAgentBottleneckEnv(), \n", - " MultiAgentIntersectionEnv(), \n", - " MultiAgentParkingLotEnv(), \n", - " MultiAgentTollgateEnv()]\n", + "env_classes = [MultiAgentRoundaboutEnv, \n", + " MultiAgentBottleneckEnv, \n", + " MultiAgentIntersectionEnv, \n", + " MultiAgentParkingLotEnv, \n", + " MultiAgentTollgateEnv]\n", "\n", "frames = []\n", - "for env in envs:\n", + "for env_class in env_classes:\n", + " env = env_class({\"render_mode\": \"top_down\"})\n", " print(\"Starting the environment {}\\n\".format(env))\n", " env.reset()\n", - " d={}\n", - " d[\"__all__\"] = False\n", + " tm={\"__all__\":False}\n", " for i in range(100):\n", - " if d[\"__all__\"]:\n", + " if tm[\"__all__\"]:\n", " frames.append(frame)\n", " continue\n", " action = env.action_space.sample()\n", " for a in action.values(): \n", " a[-1] = 1.0\n", - " o,r,d,i = env.step(action)\n", - " frame = env.render(mode=\"top_down\", film_size=(500, 500), track_target_vehicle=False, screen_size=(500, 500))\n", + " o,r,tm,tc,i = env.step(action)\n", + " frame = env.render(film_size=(500, 500), track_target_vehicle=False, screen_size=(500, 500))\n", " frames.append(frame)\n", " env.close()\n", "\n", @@ -273,6 +278,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "ClZzNVQUkqEh", diff --git a/metadrive/examples/custom_inramp_env.py b/metadrive/examples/custom_inramp_env.py index 0951043a5..49087335c 100644 --- a/metadrive/examples/custom_inramp_env.py +++ b/metadrive/examples/custom_inramp_env.py @@ -22,13 +22,13 @@ env = MetaDriveEnv(config) try: - o = env.reset() + o, _ = env.reset() env.vehicle.expert_takeover = True assert isinstance(o, np.ndarray) print("The observation is an numpy array with shape: ", o.shape) for i in range(1, 1000000000): - o, r, d, info = env.step([0, 0]) - if d and info["arrive_dest"]: + o, r, tm, tc, info = env.step([0, 0]) + if (tm or tc) and info["arrive_dest"]: env.reset() env.current_track_vehicle.expert_takeover = True except: diff --git a/metadrive/examples/draw_maps.py b/metadrive/examples/draw_maps.py index 144fba8cf..1d3c0b73f 100644 --- a/metadrive/examples/draw_maps.py +++ b/metadrive/examples/draw_maps.py @@ -17,7 +17,7 @@ env = ScenarioEnv(dict(start_scenario_index=0, num_scenarios=3)) for j in range(3): count += 1 - env.reset(force_seed=j) + env.reset(seed=j) m = draw_top_down_map(env.current_map) # m = env.get_map() ax = axs[i][j] diff --git a/metadrive/examples/drive_in_multi_agent_env.py b/metadrive/examples/drive_in_multi_agent_env.py index 1593f6cd4..b357e1e29 100644 --- a/metadrive/examples/drive_in_multi_agent_env.py +++ b/metadrive/examples/drive_in_multi_agent_env.py @@ -37,7 +37,7 @@ parser.add_argument("--top_down", action="store_true") args = parser.parse_args() env_cls_name = args.env - extra_args = dict(mode="top_down", film_size=(800, 800)) if args.top_down else {} + extra_args = dict(film_size=(800, 800)) if args.top_down else {} assert env_cls_name in envs.keys(), "No environment named {}, argument accepted: \n" \ "(1) roundabout\n" \ "(2) intersection\n" \ @@ -49,6 +49,7 @@ env = envs[env_cls_name]( { "use_render": True if not args.top_down else False, + "render_mode": "top_down" if args.top_down else None, "manual_control": False, "crash_done": False, "agent_policy": ManualControllableIDMPolicy @@ -61,7 +62,7 @@ print(HELP_MESSAGE) env.switch_to_third_person_view() # Default is in Top-down view, we switch to Third-person view. for i in range(1, 10000000000): - o, r, d, info = env.step({agent_id: [0, 0] for agent_id in env.vehicles.keys()}) + o, r, tm, tc, info = env.step({agent_id: [0, 0] for agent_id in env.vehicles.keys()}) env.render( **extra_args, text={ @@ -70,7 +71,7 @@ "Auto-Drive (Switch mode: T)": "on" if env.current_track_vehicle.expert_takeover else "off", } if not args.top_down else {} ) - if d["__all__"]: + if tm["__all__"]: env.reset() if env.current_track_vehicle: env.current_track_vehicle.expert_takeover = True diff --git a/metadrive/examples/drive_in_safe_metadrive_env.py b/metadrive/examples/drive_in_safe_metadrive_env.py index 6f830298d..5c1ce15a5 100644 --- a/metadrive/examples/drive_in_safe_metadrive_env.py +++ b/metadrive/examples/drive_in_safe_metadrive_env.py @@ -18,7 +18,7 @@ env.vehicle.expert_takeover = True for i in range(1, 1000000000): previous_takeover = env.current_track_vehicle.expert_takeover - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) env.render( text={ "Auto-Drive (Switch mode: T)": "on" if env.current_track_vehicle.expert_takeover else "off", @@ -27,7 +27,7 @@ ) if not previous_takeover and env.current_track_vehicle.expert_takeover: logging.warning("Auto-Drive mode may fail to solve some scenarios due to distribution mismatch") - if d and info["arrive_dest"]: + if (tm or tc) and info["arrive_dest"]: env.reset() env.current_track_vehicle.expert_takeover = True except Exception as e: diff --git a/metadrive/examples/drive_in_single_agent_env.py b/metadrive/examples/drive_in_single_agent_env.py index 53c684386..e5e12f4a3 100644 --- a/metadrive/examples/drive_in_single_agent_env.py +++ b/metadrive/examples/drive_in_single_agent_env.py @@ -35,7 +35,7 @@ config.update(dict(image_observation=True)) env = MetaDriveEnv(config) try: - o = env.reset() + o, _ = env.reset() print(HELP_MESSAGE) env.vehicle.expert_takeover = True if args.observation == "rgb_camera": @@ -45,13 +45,13 @@ assert isinstance(o, np.ndarray) print("The observation is an numpy array with shape: ", o.shape) for i in range(1, 1000000000): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) env.render( text={ "Auto-Drive (Switch mode: T)": "on" if env.current_track_vehicle.expert_takeover else "off", } ) - if d and info["arrive_dest"]: + if (tm or tc) and info["arrive_dest"]: env.reset() env.current_track_vehicle.expert_takeover = True except Exception as e: diff --git a/metadrive/examples/drive_in_waymo_env.py b/metadrive/examples/drive_in_waymo_env.py index e9b4484e6..21a47dd9b 100644 --- a/metadrive/examples/drive_in_waymo_env.py +++ b/metadrive/examples/drive_in_waymo_env.py @@ -10,12 +10,12 @@ class DemoWaymoEnv(WaymoEnv): - def reset(self, force_seed=None): + def reset(self, seed=None): if self.engine is not None: seeds = [i for i in range(self.config["num_scenarios"])] seeds.remove(self.current_seed) - force_seed = random.choice(seeds) - super(DemoWaymoEnv, self).reset(force_seed=force_seed) + seed = random.choice(seeds) + return super(DemoWaymoEnv, self).reset(seed=seed) if __name__ == "__main__": @@ -23,7 +23,7 @@ def reset(self, force_seed=None): parser.add_argument("--reactive_traffic", action="store_true") parser.add_argument("--top_down", action="store_true") args = parser.parse_args() - extra_args = dict(mode="top_down", film_size=(800, 800)) if args.top_down else {} + extra_args = dict(film_size=(800, 800)) if args.top_down else {} asset_path = AssetLoader.asset_path print(HELP_MESSAGE) try: @@ -32,16 +32,17 @@ def reset(self, force_seed=None): "manual_control": True, "reactive_traffic": True if args.reactive_traffic else False, "use_render": True if not args.top_down else False, + "render_mode": "top_down" if args.top_down else None, "data_directory": AssetLoader.file_path(asset_path, "waymo", return_raw_style=False), "num_scenarios": 3 } ) - o = env.reset() + o, _ = env.reset() for i in range(1, 100000): - o, r, d, info = env.step([1.0, 0.]) + o, r, tm, tc, info = env.step([1.0, 0.]) env.render(text={"Switch perspective": "Q or B", "Reset Episode": "R"}, **extra_args) - if d: + if tm or tc: env.reset() except Exception as e: raise e diff --git a/metadrive/examples/generate_video_for_bev_and_interface.py b/metadrive/examples/generate_video_for_bev_and_interface.py index d936878c2..3f1c80303 100644 --- a/metadrive/examples/generate_video_for_bev_and_interface.py +++ b/metadrive/examples/generate_video_for_bev_and_interface.py @@ -66,7 +66,7 @@ def finish(self): step_count = 0 frame_count = 0 - o = env.reset(force_seed=start_seed) + o, _ = env.reset(seed=start_seed) env.vehicle.expert_takeover = True env.engine.force_fps.disable() @@ -82,7 +82,7 @@ def finish(self): video_interface = VideoRecorder(video_name=video_name_interface, height=1200, width=1600, code="avc1") while True: - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) img_interface = env.render("rgb_array") img_bev = env.render( @@ -113,7 +113,7 @@ def finish(self): frame_count += 1 step_count += 1 - if d or step_count > 1000: + if tm or tc or step_count > 1000: ep_count += 1 step_count = 0 @@ -139,7 +139,7 @@ def finish(self): video_bev = VideoRecorder(video_name=video_name_bev, height=3000, width=3000, code="avc1") video_interface = VideoRecorder(video_name=video_name_interface, height=1200, width=1600, code="avc1") - o = env.reset(force_seed=ep_count + start_seed) + o, _ = env.reset(seed=ep_count + start_seed) env.vehicle.expert_takeover = True env.engine.force_fps.disable() diff --git a/metadrive/examples/profile_metadrive.py b/metadrive/examples/profile_metadrive.py index bdd4723bb..0f00c26ce 100644 --- a/metadrive/examples/profile_metadrive.py +++ b/metadrive/examples/profile_metadrive.py @@ -18,15 +18,15 @@ start_seed=1010, traffic_density=0.05, )) - obs = env.reset() + obs, _ = env.reset() start = time.time() reset_used_time = 0 action = [0.0, 1.] total_steps = args.num_steps vehicle_num = [len(env.engine.traffic_manager.vehicles)] for s in range(total_steps): - o, r, d, i = env.step(action) - if d: + o, r, tm, tc, i = env.step(action) + if tm or tc: start_reset = time.time() env.reset() vehicle_num.append(len(env.engine.traffic_manager.vehicles)) diff --git a/metadrive/examples/profile_metadrive_marl.py b/metadrive/examples/profile_metadrive_marl.py index 86a35b65c..084cc11a0 100644 --- a/metadrive/examples/profile_metadrive_marl.py +++ b/metadrive/examples/profile_metadrive_marl.py @@ -14,15 +14,15 @@ print("Start to profile the efficiency of MetaDrive Multi-agent Roundabout environment!") setup_logger(debug=False) env = MultiAgentRoundaboutEnv(dict(start_seed=1010)) - obs = env.reset() + obs, _ = env.reset() start = time.time() reset_used_time = 0 action = [0.0, 1.] total_steps = args.num_steps vehicle_num = [len(env.vehicles)] for s in range(total_steps): - o, r, d, i = env.step({k: action for k in env.vehicles}) - if d["__all__"]: + o, r, tm, tc, i = env.step({k: action for k in env.vehicles}) + if tm["__all__"]: start_reset = time.time() env.reset() vehicle_num.append(len(env.vehicles)) diff --git a/metadrive/examples/top_down_metadrive.py b/metadrive/examples/top_down_metadrive.py index e0d706b5f..29a95f847 100644 --- a/metadrive/examples/top_down_metadrive.py +++ b/metadrive/examples/top_down_metadrive.py @@ -65,17 +65,18 @@ def close_event(): # with keyboard in the main window. # manual_control=True, map="SSSS", + render_mode="top_down", traffic_density=0.1, num_scenarios=100, start_seed=random.randint(0, 1000), ) ) try: - o = env.reset() + o, _ = env.reset() for i in range(1, 100000): - o, r, d, info = env.step(expert(env.vehicle)) - env.render(mode="top_down", film_size=(800, 800)) - if d: + o, r, tm, tc, info = env.step(expert(env.vehicle)) + env.render(film_size=(800, 800)) + if tm or tc: env.reset() if i % 50 == 0: draw_multi_channels_top_down_observation(o, show_time=5) # show time 4s diff --git a/metadrive/manager/agent_manager.py b/metadrive/manager/agent_manager.py index 5291f0873..6b64feafd 100644 --- a/metadrive/manager/agent_manager.py +++ b/metadrive/manager/agent_manager.py @@ -2,7 +2,7 @@ from metadrive.policy.idm_policy import TrajectoryIDMPOlicy from typing import Dict -from gym.spaces import Box, Dict, MultiDiscrete, Discrete +from gymnasium.spaces import Box, Dict, MultiDiscrete, Discrete from metadrive.constants import DEFAULT_AGENT from metadrive.manager.base_manager import BaseManager diff --git a/metadrive/obs/image_obs.py b/metadrive/obs/image_obs.py index a30a25214..e0da520ed 100644 --- a/metadrive/obs/image_obs.py +++ b/metadrive/obs/image_obs.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from metadrive.component.vehicle.base_vehicle import BaseVehicle diff --git a/metadrive/obs/real_env_observation.py b/metadrive/obs/real_env_observation.py index 11e4b46fe..90e5d5d6f 100644 --- a/metadrive/obs/real_env_observation.py +++ b/metadrive/obs/real_env_observation.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from metadrive.obs.state_obs import LidarStateObservation diff --git a/metadrive/obs/state_obs.py b/metadrive/obs/state_obs.py index b2cc1f28d..67902f042 100644 --- a/metadrive/obs/state_obs.py +++ b/metadrive/obs/state_obs.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from metadrive.component.vehicle_navigation_module.node_network_navigation import NodeNetworkNavigation diff --git a/metadrive/obs/top_down_obs.py b/metadrive/obs/top_down_obs.py index c876f1876..3528df3bc 100644 --- a/metadrive/obs/top_down_obs.py +++ b/metadrive/obs/top_down_obs.py @@ -6,7 +6,7 @@ import sys from typing import Tuple -import gym +import gymnasium as gym import numpy as np from metadrive.component.vehicle.base_vehicle import BaseVehicle diff --git a/metadrive/obs/top_down_obs_multi_channel.py b/metadrive/obs/top_down_obs_multi_channel.py index 67dd39c7b..2bbd209e9 100644 --- a/metadrive/obs/top_down_obs_multi_channel.py +++ b/metadrive/obs/top_down_obs_multi_channel.py @@ -1,6 +1,6 @@ from collections import deque -import gym +import gymnasium as gym import math import numpy as np diff --git a/metadrive/policy/base_policy.py b/metadrive/policy/base_policy.py index 15ba46bf4..52be07104 100644 --- a/metadrive/policy/base_policy.py +++ b/metadrive/policy/base_policy.py @@ -2,7 +2,7 @@ import logging import uuid from metadrive.constants import CamMask -import gym +import gymnasium as gym import numpy as np from panda3d.core import NodePath diff --git a/metadrive/policy/env_input_policy.py b/metadrive/policy/env_input_policy.py index 98c90c3cf..acc8a17a7 100644 --- a/metadrive/policy/env_input_policy.py +++ b/metadrive/policy/env_input_policy.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from metadrive.engine.engine_utils import get_global_config import numpy as np diff --git a/metadrive/policy/lange_change_policy.py b/metadrive/policy/lange_change_policy.py index c534f385d..8e0c6143d 100644 --- a/metadrive/policy/lange_change_policy.py +++ b/metadrive/policy/lange_change_policy.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from metadrive.component.vehicle_module.PID_controller import PIDController from metadrive.engine.engine_utils import get_global_config diff --git a/metadrive/register.py b/metadrive/register.py index a75853099..2915e6ca7 100644 --- a/metadrive/register.py +++ b/metadrive/register.py @@ -1,6 +1,5 @@ -from gym.envs.registration import register, registry +import gymnasium as gym from packaging import version -import gym from metadrive.envs import MetaDriveEnv from metadrive.envs import MultiAgentTollgateEnv, MultiAgentBottleneckEnv, MultiAgentRoundaboutEnv, \ MultiAgentIntersectionEnv, MultiAgentParkingLotEnv, MultiAgentMetaDrive @@ -70,29 +69,26 @@ } envs = [] -existing_space = registry.env_specs if version.parse(gym.__version__) < version.parse("0.24.0") else registry for env_name, env_config in metadrive_environment_dict.items(): - if env_name not in existing_space: + if env_name not in gym.registry.keys(): envs.append(env_name) - register(id=env_name, entry_point=MetaDriveEnv, kwargs=dict(config=env_config)) + gym.register(id=env_name, entry_point=MetaDriveEnv, kwargs=dict(config=env_config)) for env_name, env_config in safe_metadrive_environment_dict.items(): - if env_name not in existing_space: + if env_name not in gym.registry.keys(): envs.append(env_name) - register(id=env_name, entry_point=SafeMetaDriveEnv, kwargs=dict(config=env_config)) + gym.register(id=env_name, entry_point=SafeMetaDriveEnv, kwargs=dict(config=env_config)) for env_name, entry in marl_env.items(): - if env_name not in existing_space: + if env_name not in gym.registry.keys(): envs.append(env_name) - register(id=env_name, entry_point=entry, kwargs=dict(config={})) + gym.register(id=env_name, entry_point=entry, kwargs=dict(config={})) if len(envs) > 0: print("Successfully registered the following environments: {}.".format(envs)) if __name__ == '__main__': # Test purpose only - import gym - env = gym.make("MetaDrive-validation-v0") env.reset() env.close() diff --git a/metadrive/tests/benchmark_FPS/benchmark_nupan_env.py b/metadrive/tests/benchmark_FPS/benchmark_nupan_env.py index e774703a1..1d2359a13 100644 --- a/metadrive/tests/benchmark_FPS/benchmark_nupan_env.py +++ b/metadrive/tests/benchmark_FPS/benchmark_nupan_env.py @@ -38,12 +38,12 @@ def benchmark_fps(): total_time = 0 total_steps = 0 for seed in range(300, 400): - env.reset(force_seed=seed) + env.reset(seed=seed) start = time.time() for i in range(env.engine.data_manager.current_scenario_length * 10): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) total_steps += 1 - if d: + if tm or tc: break total_time += time.time() - start if (seed + 300) % 20 == 0: @@ -76,7 +76,7 @@ def benchmark_reset_5_map_1000_times(load_city_map=True): load_time = time.time() - start_time # print("After first reset process Memory: {}".format(process_memory())) for seed in tqdm.tqdm(range(300, 1300)): - env.reset(force_seed=(seed % 5) + 300) + env.reset(seed=(seed % 5) + 300) # if seed % 500 == 0: # print("reset: {}, Time: {}, Process Memory: {}".format(seed, time.time() - start_time, process_memory())) # print( @@ -112,7 +112,7 @@ def benchmark_reset_1000(load_city_map=True): # print("After first reset process Memory: {}".format(process_memory())) for seed in tqdm.tqdm(range(300, 1300)): thisscenario = time.time() - env.reset(force_seed=seed) + env.reset(seed=seed) # print("Seed: {}, Time: {}".format(seed, time.time() - thisscenario)) # if seed % 500 == 0: # print("reset: {}, Time: {}, Process Memory: {}".format(seed, time.time() - start_time, process_memory())) diff --git a/metadrive/tests/benchmark_FPS/benchmark_waymo.py b/metadrive/tests/benchmark_FPS/benchmark_waymo.py index 2f682d211..62f1056a6 100644 --- a/metadrive/tests/benchmark_FPS/benchmark_waymo.py +++ b/metadrive/tests/benchmark_FPS/benchmark_waymo.py @@ -32,12 +32,12 @@ def benchmark_fps(): total_time = 0 total_steps = 0 for seed in range(300, 400): - env.reset(force_seed=seed % env.config["num_scenarios"]) + env.reset(seed=seed % env.config["num_scenarios"]) start = time.time() for i in range(100): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) total_steps += 1 - if d: + if tm or tc: break total_time += time.time() - start if (seed + 300) % 20 == 0: diff --git a/metadrive/tests/local_tests/local_test_apply_action.py b/metadrive/tests/local_tests/local_test_apply_action.py index 82a387661..78c7d2103 100644 --- a/metadrive/tests/local_tests/local_test_apply_action.py +++ b/metadrive/tests/local_tests/local_test_apply_action.py @@ -4,13 +4,13 @@ def local_test_apply_action(): try: env = MetaDriveEnv({"map": "SSS", "use_render": True}) - o = env.reset() + o, _ = env.reset() for act in [-1, 1]: for _ in range(300): assert env.observation_space.contains(o) - o, r, d, i = env.step([act, 1]) - if d: - o = env.reset() + o, r, tm, tc, i = env.step([act, 1]) + if tm or tc: + o, _ = env.reset() break env.close() finally: diff --git a/metadrive/tests/local_tests/local_test_close_and_restart.py b/metadrive/tests/local_tests/local_test_close_and_restart.py index 01b9e6aa1..2c2901d42 100644 --- a/metadrive/tests/local_tests/local_test_close_and_restart.py +++ b/metadrive/tests/local_tests/local_test_close_and_restart.py @@ -5,11 +5,11 @@ def local_test_close_and_restart(): try: for m in ["X", "O", "C", "S", "R", "r", "T"]: env = MetaDriveEnv({"map": m, "use_render": True}) - o = env.reset() + o, _ = env.reset() for _ in range(300): assert env.observation_space.contains(o) - o, r, d, i = env.step([1, 1]) - if d: + o, r, tm, tc, i = env.step([1, 1]) + if tm or tc: break env.close() finally: diff --git a/metadrive/tests/scripts/benchmark_brake.py b/metadrive/tests/scripts/benchmark_brake.py index 71915a573..c7bee0e81 100644 --- a/metadrive/tests/scripts/benchmark_brake.py +++ b/metadrive/tests/scripts/benchmark_brake.py @@ -8,7 +8,7 @@ def get_result(env): - obs = env.reset() + obs, _ = env.reset() start = time.time() max_speed_km_h = 0.0 reported_max_speed = None @@ -64,7 +64,7 @@ def get_result(env): rotate_displacement = np.asarray(env.vehicle.position) - np.asarray(rotate_start_pos) reported_rotation = True - o, r, d, i = env.step(action) + o, r, tm, tc, i = env.step(action) if reported_max_speed and reported_start and reported_end and reported_rotation: break diff --git a/metadrive/tests/scripts/capture_obs.py b/metadrive/tests/scripts/capture_obs.py index 5c411df84..a977d56fb 100644 --- a/metadrive/tests/scripts/capture_obs.py +++ b/metadrive/tests/scripts/capture_obs.py @@ -45,7 +45,7 @@ } } ) - o = env.reset() + o, _ = env.reset() depth_camera = env.config["vehicle_config"]["depth_camera"] depth_camera = DepthCamera(*depth_camera, chassis_np=env.vehicle.chassis, engine=env.engine) @@ -60,7 +60,7 @@ env.vehicle.chassis.setPos(244, 0, 1.5) for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) env.render( # text={ # "vehicle_num": len(env.engine.traffic_manager.traffic_vehicles), @@ -68,6 +68,6 @@ # "dist_to_right:": env.vehicle.dist_to_right, # } ) - if d: + if tm or tc: env.reset() env.close() diff --git a/metadrive/tests/scripts/capture_rgb_and_send_to_other_process.py b/metadrive/tests/scripts/capture_rgb_and_send_to_other_process.py index e354a79ae..dc854fce0 100644 --- a/metadrive/tests/scripts/capture_rgb_and_send_to_other_process.py +++ b/metadrive/tests/scripts/capture_rgb_and_send_to_other_process.py @@ -33,7 +33,7 @@ def main_thread(): ) env = MetaDriveEnv(config) try: - o = env.reset() + o, _ = env.reset() # print(HELP_MESSAGE) env.vehicle.expert_takeover = False context = zmq.Context() diff --git a/metadrive/tests/scripts/generate_video_for_image_obs.py b/metadrive/tests/scripts/generate_video_for_image_obs.py index f200606c0..d48757a20 100644 --- a/metadrive/tests/scripts/generate_video_for_image_obs.py +++ b/metadrive/tests/scripts/generate_video_for_image_obs.py @@ -5,7 +5,7 @@ import time import numpy as np -from gym import logger, error +from gymnasium import logger, error from panda3d.core import PNMImage from metadrive.component.algorithm.BIG import BigGenerateMethod @@ -155,7 +155,7 @@ def gen_video(frames, file="tmp"): env.reset() frames = [] for num_frames in range(30): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) img = PNMImage() env.engine.win.getScreenshot(img) frame = np.zeros([1200, 900, 4], dtype=np.uint8) diff --git a/metadrive/tests/scripts/offscreen_rendering.py b/metadrive/tests/scripts/offscreen_rendering.py index 99fe927f2..5ea4327cf 100644 --- a/metadrive/tests/scripts/offscreen_rendering.py +++ b/metadrive/tests/scripts/offscreen_rendering.py @@ -29,7 +29,7 @@ env.reset() frames = [] for num_frames in range(100): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) frame = o['image'] frame = frame[..., 0] # Original return frame is [1200, 1920, 3, 1] (float), so remove last dim. # frame = 1 - frame diff --git a/metadrive/tests/scripts/profile_env.py b/metadrive/tests/scripts/profile_env.py index 8fbab2762..2fdd74652 100644 --- a/metadrive/tests/scripts/profile_env.py +++ b/metadrive/tests/scripts/profile_env.py @@ -11,8 +11,8 @@ } ) - o = env.reset() + o, _ = env.reset() for i in range(1, 10000): # print(i) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) env.close() diff --git a/metadrive/tests/scripts/profile_reset.py b/metadrive/tests/scripts/profile_reset.py index 33c80fd22..723f0ba30 100644 --- a/metadrive/tests/scripts/profile_reset.py +++ b/metadrive/tests/scripts/profile_reset.py @@ -4,11 +4,11 @@ if __name__ == '__main__': env = MetaDriveEnv(dict(num_scenarios=1000, traffic_density=0.1, start_seed=5000)) - obs = env.reset() + obs, _ = env.reset() start = time.time() vc = [] for s in range(1000): - env.reset(force_seed=s + 5000) + env.reset(seed=s + 5000) print("We have {} vehicles in seed {} map!".format(len(env.engine.traffic_manager.vehicles), s)) vc.append(len(env.engine.traffic_manager.vehicles)) if (s + 1) % 1 == 0: diff --git a/metadrive/tests/scripts/profile_top_down_env.py b/metadrive/tests/scripts/profile_top_down_env.py index 503aa2c31..f17da0d0c 100644 --- a/metadrive/tests/scripts/profile_top_down_env.py +++ b/metadrive/tests/scripts/profile_top_down_env.py @@ -4,13 +4,13 @@ if __name__ == '__main__': env = TopDownSingleFrameMetaDriveEnv(dict(num_scenarios=10)) - o = env.reset() + o, _ = env.reset() start = time.time() action = [0.0, 0.1] # print(o.shape) for s in range(10000): - o, r, d, i = env.step(action) - if d: + o, r, tm, tc, i = env.step(action) + if tm or tc: env.reset() if (s + 1) % 100 == 0: print( diff --git a/metadrive/tests/scripts/profile_top_down_multi_channel_env.py b/metadrive/tests/scripts/profile_top_down_multi_channel_env.py index 8b25c71ba..4d1847cbe 100644 --- a/metadrive/tests/scripts/profile_top_down_multi_channel_env.py +++ b/metadrive/tests/scripts/profile_top_down_multi_channel_env.py @@ -4,13 +4,13 @@ if __name__ == '__main__': env = TopDownMetaDriveEnvV2(dict(num_scenarios=10, frame_stack=10, frame_skip=3)) - o = env.reset() + o, _ = env.reset() start = time.time() action = [0.0, 0.1] # print(o.shape) for s in range(10000): - o, r, d, i = env.step(action) - if d: + o, r, tm, tc, i = env.step(action) + if tm or tc: env.reset() if (s + 1) % 100 == 0: print( diff --git a/metadrive/tests/scripts/profile_top_down_v2.py b/metadrive/tests/scripts/profile_top_down_v2.py index 543287073..361f6b0ef 100644 --- a/metadrive/tests/scripts/profile_top_down_v2.py +++ b/metadrive/tests/scripts/profile_top_down_v2.py @@ -4,13 +4,13 @@ if __name__ == '__main__': env = TopDownMetaDriveEnvV2(dict(num_scenarios=10)) - o = env.reset() + o, _ = env.reset() start = time.time() action = [0.0, 0.1] # print(o.shape) for s in range(10000): - o, r, d, i = env.step(action) - if d: + o, r, tm, tc, i = env.step(action) + if tm or tc: env.reset() if (s + 1) % 100 == 0: print( diff --git a/metadrive/tests/test_component/test_bicycle_model.py b/metadrive/tests/test_component/test_bicycle_model.py index d596a99ad..aca4f9bea 100644 --- a/metadrive/tests/test_component/test_bicycle_model.py +++ b/metadrive/tests/test_component/test_bicycle_model.py @@ -29,7 +29,7 @@ def _test_bicycle_model(): } ) bicycle_model = BicycleModel() - o = env.reset() + o, _ = env.reset() vehicle = env.current_track_vehicle v_dir = vehicle.velocity_direction bicycle_model.reset(*vehicle.position, vehicle.speed, vehicle.heading_theta, np.arctan2(v_dir[1], v_dir[0])) @@ -53,7 +53,7 @@ def _test_bicycle_model(): model=bicycle_model ) ) - o, r, d, info = env.step(actions[s]) + o, r, tm, tc, info = env.step(actions[s]) index = s - horizon if index >= 0: state = predict_states[index] diff --git a/metadrive/tests/test_component/test_camera.py b/metadrive/tests/test_component/test_camera.py index 2843d7042..e77133803 100644 --- a/metadrive/tests/test_component/test_camera.py +++ b/metadrive/tests/test_component/test_camera.py @@ -19,12 +19,12 @@ def _test_main_camera_as_obs(render): show_fps=False, ) ) - obs = env.reset() + obs, _ = env.reset() action = [0.0, 1.] for _ in range(3): env.reset() for s in range(20): - o, r, d, i = env.step(action) + o, r, tm, tc, i = env.step(action) # engine = env.engine # if engine.episode_step <= 1: # engine.graphicsEngine.renderFrame() @@ -59,12 +59,12 @@ def _test_rgb_camera_as_obs(): show_fps=False, ) ) - obs = env.reset() + obs, _ = env.reset() action = [0.0, 1.] for _ in range(3): env.reset() for s in range(20): - o, r, d, i = env.step(action) + o, r, tm, tc, i = env.step(action) # engine = env.engine # if engine.episode_step <= 1: # engine.graphicsEngine.renderFrame() diff --git a/metadrive/tests/test_component/test_curriculum_reset.py b/metadrive/tests/test_component/test_curriculum_reset.py index de4a01e81..501466970 100644 --- a/metadrive/tests/test_component/test_curriculum_reset.py +++ b/metadrive/tests/test_component/test_curriculum_reset.py @@ -23,9 +23,9 @@ def _test_level(level=1, render=False): try: scenario_id = set() for i in tqdm(range(10), desc=str(level)): - env.reset(force_seed=i) + env.reset(seed=i) for i in range(10): - o, r, d, _ = env.step([0, 0]) + o, r, d, _, _ = env.step([0, 0]) if d: break @@ -61,9 +61,9 @@ def test_curriculum_up_1_level(render=False, level=5): try: scenario_id = [] for i in tqdm(range(10), desc=str(level)): - env.reset(force_seed=i) + env.reset(seed=i) for i in range(10): - o, r, d, _ = env.step([0, 0]) + o, r, d, _, _ = env.step([0, 0]) scenario_id.append(env.engine.data_manager.current_scenario_summary["id"]) assert len(set(scenario_id)) == 4 ids = [env.engine.data_manager.summary_dict[f]["id"] for f in env.engine.data_manager.summary_lookup] @@ -94,7 +94,7 @@ def test_curriculum_level_up(render=False): for i in tqdm(range(20), desc=str(2)): env.reset() for i in range(250): - o, r, d, _ = env.step([0, 0]) + o, r, d, _, _ = env.step([0, 0]) scenario_id.append(env.engine.data_manager.current_scenario_summary["id"]) assert len(set(scenario_id)) == 10 ids = [env.engine.data_manager.summary_dict[f]["id"] for f in env.engine.data_manager.summary_lookup] @@ -132,7 +132,7 @@ def _worker_env(render, worker_index, level_up=False): for i in range(20): env.reset() for i in range(10): - o, r, d, _ = env.step([0, 0]) + o, r, d, _, _ = env.step([0, 0]) scenario_id.append(env.engine.data_manager.current_scenario_summary["id"]) print(env.current_seed) all_scenario = [ @@ -187,7 +187,7 @@ def level_up_worker(render, worker_index): for i in tqdm(range(20), desc=str(2)): env.reset() for i in range(250): - o, r, d, _ = env.step([0, 0]) + o, r, d, _, _ = env.step([0, 0]) scenario_id.append(env.engine.data_manager.current_scenario_summary["id"]) assert len(set(scenario_id)) == 4 ids = [env.engine.data_manager.summary_dict[f]["id"] for f in env.engine.data_manager.summary_lookup[:8]] @@ -228,7 +228,7 @@ def test_start_seed_not_0(render=False, worker_index=0): for i in tqdm(range(20), desc=str(2)): env.reset() for i in range(250): - o, r, d, _ = env.step([0, 0]) + o, r, d, _, _ = env.step([0, 0]) scenario_id.append(env.engine.data_manager.current_scenario_summary["id"]) all_scenarios = sorted(list(env.engine.data_manager.summary_dict.keys()))[2:] summary_lookup = env.engine.data_manager.summary_lookup[2:] @@ -266,7 +266,7 @@ def test_start_seed_1_9(render=False, worker_index=0): for i in tqdm(range(20), desc=str(2)): env.reset() for i in range(250): - o, r, d, _ = env.step([0, 0]) + o, r, d, _, _ = env.step([0, 0]) scenario_id.append(env.engine.data_manager.current_scenario_summary["id"]) all_scenarios = sorted(list(env.engine.data_manager.summary_dict.keys()))[1:9] summary_lookup = env.engine.data_manager.summary_lookup[1:9] diff --git a/metadrive/tests/test_component/test_detector_mask.py b/metadrive/tests/test_component/test_detector_mask.py index 2e6a2b519..301eeaca7 100644 --- a/metadrive/tests/test_component/test_detector_mask.py +++ b/metadrive/tests/test_component/test_detector_mask.py @@ -262,7 +262,7 @@ def test_detector_mask_in_lidar(): ) ep_count = 0 for tt in range(3000): - o, r, d, i = env.step([0, 1]) + o, r, tm, tc, i = env.step([0, 1]) # print("We have: {} vehicles!".format(env.engine.traffic_manager.get_vehicle_num())) @@ -303,7 +303,7 @@ def test_detector_mask_in_lidar(): new_cloud_points = np.array(copy.deepcopy(c_p)) np.testing.assert_almost_equal(old_cloud_points, new_cloud_points) - if d: + if tm or tc: env.reset() ep_count += 1 if ep_count == 3: diff --git a/metadrive/tests/test_component/test_distance_detector.py b/metadrive/tests/test_component/test_distance_detector.py index 1792471cf..fd7250b58 100644 --- a/metadrive/tests/test_component/test_distance_detector.py +++ b/metadrive/tests/test_component/test_distance_detector.py @@ -42,13 +42,13 @@ def test_original_lidar(render=False): detect_traffic_vehicle = False detect_base_vehicle = False for i in range(1, 1000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) if len(env.vehicle.lidar.get_surrounding_vehicles(env.observations[DEFAULT_AGENT].detected_objects)) > 2: detect_traffic_vehicle = True for hit in env.observations[DEFAULT_AGENT].detected_objects: if isinstance(hit, BaseVehicle): detect_base_vehicle = True - if d: + if tm or tc: break # if not (detect_traffic_vehicle and detect_base_vehicle): # print("Lidar detection failed") @@ -94,13 +94,13 @@ def test_lidar_with_mask(render=False): detect_traffic_vehicle = False detect_base_vehicle = False for i in range(1, 1000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) if len(env.vehicle.lidar.get_surrounding_vehicles(env.observations[DEFAULT_AGENT].detected_objects)) > 2: detect_traffic_vehicle = True for hit in env.observations[DEFAULT_AGENT].detected_objects: if isinstance(hit, BaseVehicle): detect_base_vehicle = True - if d: + if tm or tc: break # if not (detect_traffic_vehicle and detect_base_vehicle): # print("Lidar detection failed") diff --git a/metadrive/tests/test_component/test_set_get_vehicle_attribute.py b/metadrive/tests/test_component/test_set_get_vehicle_attribute.py index dfb8a3be4..6bb8d4605 100644 --- a/metadrive/tests/test_component/test_set_get_vehicle_attribute.py +++ b/metadrive/tests/test_component/test_set_get_vehicle_attribute.py @@ -16,17 +16,17 @@ def test_set_get_vehicle_attribute(render=False): } ) try: - o = env.reset() + o, _ = env.reset() for _ in range(10): env.vehicle.set_velocity([5, 0], in_local_frame=False) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(env.vehicle.speed - 5) < 0.01 # may encounter friction assert np.isclose(env.vehicle.velocity, np.array([5, 0]), rtol=1e-2, atol=1e-2).all() assert abs(env.vehicle.speed - env.vehicle.speed_km_h / 3.6) < 1e-4 assert np.isclose(env.vehicle.velocity, env.vehicle.velocity_km_h / 3.6).all() for _ in range(10): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) env.vehicle.set_velocity([0, 5], in_local_frame=False) assert abs(env.vehicle.speed - 5) < 0.1 assert np.isclose(env.vehicle.velocity, np.array([0, 5]), rtol=1e-5, atol=1e-5).all() @@ -34,7 +34,7 @@ def test_set_get_vehicle_attribute(render=False): assert np.isclose(env.vehicle.velocity, env.vehicle.velocity_km_h / 3.6).all() for _ in range(10): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) env.vehicle.set_velocity([5, 3], value=10, in_local_frame=False) assert abs(env.vehicle.speed - 10) < 0.1 assert np.isclose( @@ -47,7 +47,7 @@ def test_set_get_vehicle_attribute(render=False): assert np.isclose(env.vehicle.velocity, env.vehicle.velocity_km_h / 3.6).all() for _ in range(10): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) env.vehicle.set_velocity([0.3, 0.1], value=10, in_local_frame=False) assert abs(env.vehicle.speed - 10) < 0.1 assert np.isclose( @@ -79,28 +79,28 @@ def test_coordinates(render=False): } ) try: - o = env.reset() + o, _ = env.reset() assert abs(env.vehicle.heading_theta) == 0 assert np.isclose(env.vehicle.heading, [1.0, 0]).all() env.vehicle.set_velocity([5, 0], in_local_frame=True) for _ in range(10): env.vehicle.set_velocity([5, 0], in_local_frame=True) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(env.vehicle.velocity[0] - 5.) < 1e-2 and abs(env.vehicle.velocity[1]) < 0.001 - o = env.reset() + o, _ = env.reset() assert abs(env.vehicle.heading_theta) == 0 assert np.isclose(env.vehicle.heading, [1.0, 0]).all() env.vehicle.set_velocity([5, 0], in_local_frame=False) for _ in range(10): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert env.vehicle.velocity[0] > 3. and abs(env.vehicle.velocity[1]) < 0.001 env.reset() env.vehicle.set_velocity([0, 5], in_local_frame=False) for _ in range(1): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert env.vehicle.velocity[1] > 3. and abs(env.vehicle.velocity[0]) < 0.002 env.reset() @@ -108,13 +108,13 @@ def test_coordinates(render=False): assert np.isclose(env.vehicle.heading, [1.0, 0]).all() env.vehicle.set_velocity([-5, 0], in_local_frame=False) for _ in range(10): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert env.vehicle.velocity[0] < -3. and abs(env.vehicle.velocity[1]) < 0.001 env.vehicle.set_velocity([0, -5], in_local_frame=False) for _ in range(1): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert env.vehicle.velocity[1] < -3. and abs(env.vehicle.velocity[0]) < 0.002 # steering left @@ -124,7 +124,7 @@ def test_coordinates(render=False): assert np.isclose(env.vehicle.heading, [1.0, 0]).all() for _ in range(100): - o, r, d, info = env.step([0.8, 0.8]) + o, r, tm, tc, info = env.step([0.8, 0.8]) assert env.vehicle.velocity[1] > 1. and abs(env.vehicle.velocity[0]) > 1 assert env.vehicle.heading_theta > 0.3 # rad assert env.vehicle.position[0] > begining_pos[0] and env.vehicle.position[1] > begining_pos[1] @@ -136,7 +136,7 @@ def test_coordinates(render=False): assert np.isclose(env.vehicle.heading, [1.0, 0]).all() for _ in range(100): - o, r, d, info = env.step([-0.8, 0.8]) + o, r, tm, tc, info = env.step([-0.8, 0.8]) assert env.vehicle.velocity[1] < -1. and abs(env.vehicle.velocity[0]) > 1 assert env.vehicle.position[0] > begining_pos[0] and env.vehicle.position[1] < begining_pos[1] assert env.vehicle.heading_theta < -0.3 # rad @@ -144,21 +144,21 @@ def test_coordinates(render=False): env.reset() env.vehicle.set_heading_theta(np.deg2rad(90)) for _ in range(10): - o, r, d, info, = env.step([-0., 0.]) + o, r, tm, tc, info, = env.step([-0., 0.]) assert wrap_to_pi(abs(env.vehicle.heading_theta - np.deg2rad(90))) < 1 assert np.isclose(env.vehicle.heading, np.array([0, 1]), 1e-4, 1e-4).all() env.reset() env.vehicle.set_heading_theta(np.deg2rad(45)) for _ in range(10): - o, r, d, info, = env.step([-0., 0.]) + o, r, tm, tc, info, = env.step([-0., 0.]) assert wrap_to_pi(abs(env.vehicle.heading_theta - np.deg2rad(45))) < 1 assert np.isclose(env.vehicle.heading, np.array([np.sqrt(2) / 2, np.sqrt(2) / 2]), 1e-4, 1e-4).all() env.reset() env.vehicle.set_heading_theta(np.deg2rad(-90)) for _ in range(10): - o, r, d, info, = env.step([-0., 0.]) + o, r, tm, tc, info, = env.step([-0., 0.]) assert abs(env.vehicle.heading_theta + np.deg2rad(-90) + np.pi) < 0.01 assert np.isclose(env.vehicle.heading, np.array([0, -1]), 1e-4, 1e-4).all() finally: @@ -179,62 +179,62 @@ def test_set_angular_v_and_set_v_no_friction(render=False): } ) try: - o = env.reset() + o, _ = env.reset() for _ in range(100): # 10 s , np.pi/10 per second env.vehicle.set_angular_velocity(np.pi / 10) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(wrap_to_pi(env.vehicle.heading_theta) - np.pi) < 1e-2, env.vehicle.heading_theta # print(env.vehicle.heading_theta / np.pi * 180) - o = env.reset() + o, _ = env.reset() for _ in range(100): # 10 s , np.pi/10 per second env.vehicle.set_angular_velocity(18, in_rad=False) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(wrap_to_pi(env.vehicle.heading_theta) - np.pi) < 1e-2, env.vehicle.heading_theta # print(env.vehicle.heading_theta / np.pi * 180) - o = env.reset() + o, _ = env.reset() start = env.vehicle.position[0] for _ in range(100): # 10 s env.vehicle.set_velocity([1, 0], in_local_frame=True) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(env.vehicle.position[0] - start - 10) < 5e-2, env.vehicle.position - o = env.reset() + o, _ = env.reset() start = env.vehicle.position[0] for _ in range(100): # 10 s env.vehicle.set_velocity([1, 0], in_local_frame=False) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(env.vehicle.position[0] - start - 10) < 5e-2, env.vehicle.position - o = env.reset() + o, _ = env.reset() start = env.vehicle.position[1] for _ in range(10): # 10 s env.vehicle.set_velocity([0, 1], in_local_frame=False) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(env.vehicle.position[1] - start - 1) < 5e-2, env.vehicle.position - o = env.reset() + o, _ = env.reset() start = env.vehicle.position[0] env.vehicle.set_heading_theta(-np.pi / 2) for _ in range(100): # 10 s env.vehicle.set_velocity([0, 1], in_local_frame=True) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(env.vehicle.position[0] - start - 10) < 5e-2, env.vehicle.position - o = env.reset() + o, _ = env.reset() start = env.vehicle.position[0] env.vehicle.set_heading_theta(-np.pi / 2) for _ in range(100): # 10 s env.vehicle.set_velocity([1, 0], in_local_frame=False) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(env.vehicle.position[0] - start - 10) < 5e-2, env.vehicle.position finally: env.close() @@ -254,29 +254,29 @@ def test_set_angular_v_and_set_v_no_friction_pedestrian(render=False): } ) try: - o = env.reset() + o, _ = env.reset() env.engine.terrain.dynamic_nodes[0].setFriction(0.) obj_1 = env.engine.spawn_object(Pedestrian, position=[10, 3], heading_theta=0, random_seed=1) for _ in range(10): # 10 s , np.pi/10 per second obj_1.set_angular_velocity(np.pi / 10) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(wrap_to_pi(obj_1.heading_theta) - np.pi / 10) < 1e-2, obj_1.heading_theta obj_1.destroy() - o = env.reset() + o, _ = env.reset() env.engine.terrain.dynamic_nodes[0].setFriction(0.) obj_1 = env.engine.spawn_object(Pedestrian, position=[10, 3], heading_theta=0, random_seed=1) for _ in range(10): # obj_1.set_position([30,0], 10) # 10 s , np.pi/10 per second obj_1.set_angular_velocity(18, in_rad=False) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(wrap_to_pi(obj_1.heading_theta) - np.pi / 10) < 1e-2, obj_1.heading_theta # print(obj_1.heading_theta / np.pi * 180) obj_1.destroy() - o = env.reset() + o, _ = env.reset() env.engine.terrain.dynamic_nodes[0].setFriction(0.) obj_1 = env.engine.spawn_object(Pedestrian, position=[10, 3], heading_theta=0, random_seed=1) start_p = obj_1.position[0] @@ -284,12 +284,12 @@ def test_set_angular_v_and_set_v_no_friction_pedestrian(render=False): # obj_1.set_position([30,0], 10) # 10 s , np.pi/10 per second obj_1.set_velocity([1, 0]) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(obj_1.position[0] - start_p) > 0.7 # print(obj_1.heading_theta / np.pi * 180) obj_1.destroy() - o = env.reset() + o_ = env.reset() env.engine.terrain.dynamic_nodes[0].setFriction(0.) obj_1 = env.engine.spawn_object(Pedestrian, position=[10, 3], heading_theta=0, random_seed=1) start_p = obj_1.position[1] @@ -297,7 +297,7 @@ def test_set_angular_v_and_set_v_no_friction_pedestrian(render=False): # obj_1.set_position([30,0], 10) # 10 s , np.pi/10 per second obj_1.set_velocity([0, 1]) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert abs(obj_1.position[1] - start_p) > 0.7 # print(obj_1.heading_theta / np.pi * 180) obj_1.destroy() diff --git a/metadrive/tests/test_component/test_store_map_memory_leak.py b/metadrive/tests/test_component/test_store_map_memory_leak.py index ed24d8028..fa132d70c 100644 --- a/metadrive/tests/test_component/test_store_map_memory_leak.py +++ b/metadrive/tests/test_component/test_store_map_memory_leak.py @@ -33,7 +33,7 @@ def test_pgdrive_env_memory_leak(): try: for j in tqdm.tqdm(range(out_loop_num)): for i in range(num): - obs = env.reset(force_seed=i) + obs, _ = env.reset(seed=i) if j == 0 and i == 0: start_memory = process_memory() end_memory = process_memory() diff --git a/metadrive/tests/test_component/test_vehicle_coordinates.py b/metadrive/tests/test_component/test_vehicle_coordinates.py index 02314fb10..3d1412931 100644 --- a/metadrive/tests/test_component/test_vehicle_coordinates.py +++ b/metadrive/tests/test_component/test_vehicle_coordinates.py @@ -26,7 +26,7 @@ def test_coordinates_shift(): pos = [(x, y) for x in [-10, 0, 10] for y in [-20, 0, 20]] * 10 p = pos.pop() for s in range(1, 100000): - o, r, d, info = env.step([1, 0.3]) + o, r, tm, tc, info = env.step([1, 0.3]) if s % 10 == 0: if len(pos) == 0: break diff --git a/metadrive/tests/test_env/_test_change_friction_density_envs.py b/metadrive/tests/test_env/_test_change_friction_density_envs.py index 1776d4f9c..4656a49b7 100644 --- a/metadrive/tests/test_env/_test_change_friction_density_envs.py +++ b/metadrive/tests/test_env/_test_change_friction_density_envs.py @@ -7,11 +7,11 @@ def _run(env): try: for _ in range(5): - obs = env.reset() + obs, _ = env.reset() for s in range(100): action = np.array([0.0, 1.0]) - o, r, d, i = env.step(action) - if d: + o, r, tm, tc, i = env.step(action) + if tm or tc: env.reset() finally: env.close() diff --git a/metadrive/tests/test_env/local_test_metadrive_rgb_depth.py b/metadrive/tests/test_env/local_test_metadrive_rgb_depth.py index 1b88fa212..1adfddd98 100644 --- a/metadrive/tests/test_env/local_test_metadrive_rgb_depth.py +++ b/metadrive/tests/test_env/local_test_metadrive_rgb_depth.py @@ -11,7 +11,7 @@ def _act(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) assert env.observation_space.contains(obs) assert np.isscalar(reward) assert isinstance(info, dict) @@ -22,7 +22,7 @@ def _act(env, action): def test_metadrive_env_rgb(): env = MetaDriveEnv(dict(image_observation=True)) try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) _act(env, env.action_space.sample()) for x in [-1, 0, 1]: diff --git a/metadrive/tests/test_env/test_ma_bidirection.py b/metadrive/tests/test_env/test_ma_bidirection.py index d0cf54ecd..89f1a5d07 100644 --- a/metadrive/tests/test_env/test_ma_bidirection.py +++ b/metadrive/tests/test_env/test_ma_bidirection.py @@ -18,18 +18,18 @@ def test_ma_bidirection_idm(render=False): ) index = ('1y0_1_', '2B0_0_', 0) try: - o = env.reset() + o, _ = env.reset() env.vehicle.set_velocity([1, 0.1], 10) # print(env.vehicle.speed) pass_test = False for s in range(1, 10000): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) _, lat = env.vehicle.lane.local_coordinates(env.vehicle.position) if abs(lat) > env.vehicle.lane.width / 2 + 0.1 and len(env.vehicle.navigation.current_ref_lanes) == 1: raise ValueError("IDM can not pass bidirection block") if env.vehicle.lane.index == index and abs(lat) < 0.1: pass_test = True - if d and info["arrive_dest"]: + if (tm or tc) and info["arrive_dest"]: break assert pass_test env.close() diff --git a/metadrive/tests/test_env/test_ma_bottleneck_env.py b/metadrive/tests/test_env/test_ma_bottleneck_env.py index 6babaf379..b18dbfb8c 100644 --- a/metadrive/tests/test_env/test_ma_bottleneck_env.py +++ b/metadrive/tests/test_env/test_ma_bottleneck_env.py @@ -3,7 +3,7 @@ MULTI_AGENT_METADRIVE_DEFAULT_CONFIG["force_seed_spawn_manager"] = True import numpy as np -from gym.spaces import Box, Dict +from gymnasium.spaces import Box, Dict from metadrive.envs.marl_envs.marl_bottleneck import MultiAgentBottleneckEnv from metadrive.utils import distance_greater, norm @@ -58,17 +58,18 @@ def _check_space(env): def _act(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) _check_shape(env) - if not done["__all__"]: + if not terminated["__all__"]: assert len(env.vehicles) > 0 if not (set(obs.keys()) == set(reward.keys()) == set(env.observation_space.spaces.keys())): raise ValueError assert env.observation_space.contains(obs) assert isinstance(reward, dict) assert isinstance(info, dict) - assert isinstance(done, dict) - return obs, reward, done, info + assert isinstance(terminated, dict) + assert isinstance(truncated, dict) + return obs, reward, terminated, truncated, info def test_ma_bottleneck_env(): @@ -84,14 +85,15 @@ def test_ma_bottleneck_env(): "vehicle_config": {"lidar": {"num_others": 0}}})]: try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) + assert not any(tc.values()) # # print("Current number of vehicles: ", len(env.vehicles)) finally: env.close() @@ -116,38 +118,39 @@ def test_ma_bottleneck_horizon(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) last_keys = set(env.vehicles.keys()) for step in range(1, 1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) new_keys = set(env.vehicles.keys()) if step == 0: - assert not any(d.values()) - if any(d.values()): + assert not any(tm.values()) + assert not any(tc.values()) + if any(tm.values()): assert len(last_keys) <= 4 # num of agents assert len(new_keys) <= 4 # num of agents for k in new_keys.difference(last_keys): assert k in o - assert k in d + assert k in tm # print("Step {}, Done: {}".format(step, d)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk]["out_of_road"] for kkk, iii in i.items(): if "out_of_road" in iii and (iii["out_of_road"] or iii["cost"] == 778): - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk]["out_of_road"] # assert r[kkk] == -777 - if d["__all__"]: + if tm["__all__"]: break last_keys = new_keys finally: @@ -158,16 +161,17 @@ def test_ma_bottleneck_reset(): env = MultiAgentBottleneckEnv({"horizon": 50, "num_agents": 4}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) - if d["__all__"]: - obs = env.reset() + assert not any(tm.values()) + assert not any(tc.values()) + if tm["__all__"]: + obs, _ = env.reset() assert env.observation_space.contains(obs) _check_spaces_after_reset(env, obs) @@ -185,7 +189,7 @@ def test_ma_bottleneck_reset(): _check_spaces_before_reset(env) success_count = 0 agent_count = 0 - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) @@ -194,7 +198,7 @@ def test_ma_bottleneck_reset(): # for _ in range(2): # act = {k: [1, 1] for k in env.vehicles.keys()} - # o, r, d, i = _act(env, act) + # o, r, tm, tc, i = _act(env, act) # Force vehicle to success! for v_id, v in env.vehicles.items(): @@ -215,7 +219,7 @@ def test_ma_bottleneck_reset(): assert env._is_arrive_destination(v) act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v in env.vehicles.values(): assert len(v.navigation.checkpoints) > 2 @@ -225,18 +229,18 @@ def test_ma_bottleneck_reset(): # # print("{} success!".format(kkk)) success_count += 1 - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": assert i[kkk]["arrive_dest"] agent_count += 1 for kkk, rrr in r.items(): - if d[kkk]: + if tm[kkk]: assert rrr == 777 - if d["__all__"]: + if tm["__all__"]: # print("Finish {} agents. Success {} agents.".format(agent_count, success_count)) - o = env.reset() + o, _ = env.reset() assert env.observation_space.contains(o) _check_spaces_after_reset(env, o) break @@ -268,12 +272,13 @@ def _no_close_spawn(vehicles): try: _check_spaces_before_reset(env) for num_r in range(10): - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) for _ in range(10): - o, r, d, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) + o, r, tm, tc, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) # print(d) - assert not any(d.values()) + assert not any(tm.values()) + assert not any(tc.values()) _no_close_spawn(env.vehicles) # print('Finish {} resets.'.format(num_r)) finally: @@ -286,15 +291,15 @@ def test_ma_bottleneck_reward_done_alignment(): env = MultiAgentBottleneckEnv({"horizon": 200, "num_agents": 4, "out_of_road_penalty": 777, "crash_done": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for action in [-1, 1]: for step in range(5000): act = {k: [action, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - for kkk, ddd in d.items(): - if ddd and kkk != "__all__" and not d["__all__"] and not i[kkk]["max_step"]: + o, r, tm, tc, i = _act(env, act) + for kkk, ddd in tm.items(): + if ddd and kkk != "__all__" and not tm["__all__"] and not i[kkk]["max_step"]: if r[kkk] != -777: raise ValueError # assert r[kkk] == -777 @@ -302,10 +307,10 @@ def test_ma_bottleneck_reward_done_alignment(): # # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["out_of_road"] # # print('{} reward passed!'.format(kkk)) - if d["__all__"]: + if tm["__all__"]: env.reset() break finally: @@ -330,20 +335,20 @@ def test_ma_bottleneck_reward_done_alignment(): env._DEBUG_RANDOM_SEED = 1 try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) for step in range(5): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) env.vehicles["agent0"].set_position(env.vehicles["agent1"].position, height=1.2) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) - if not any(d.values()): + if not any(tm.values()): continue - assert sum(d.values()) == 2 + assert sum(tm.values()) == 2 for kkk in ['agent0', 'agent1']: iii = i[kkk] @@ -351,7 +356,7 @@ def test_ma_bottleneck_reward_done_alignment(): assert iii["crash"] # assert r[kkk] == -1.7777 # for kkk, ddd in d.items(): - ddd = d[kkk] + ddd = tm[kkk] if ddd and kkk != "__all__": # assert r[kkk] == -1.7777 assert i[kkk]["crash_vehicle"] @@ -360,7 +365,7 @@ def test_ma_bottleneck_reward_done_alignment(): # for kkk, rrr in r.items(): rrr = r[kkk] if rrr == -1.7777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # # print('{} reward passed!'.format(kkk)) @@ -390,11 +395,11 @@ def test_ma_bottleneck_reward_done_alignment(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) for step in range(1): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v_id, v in env.vehicles.items(): if v_id != "agent0": @@ -402,15 +407,15 @@ def test_ma_bottleneck_reward_done_alignment(): for step in range(5000): act = {k: [0, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): if iii["crash"]: assert iii["crash_vehicle"] if iii["crash_vehicle"]: assert iii["crash"] # assert r[kkk] == -1.7777 - for kkk, ddd in d.items(): - if ddd and kkk != "__all__" and not d["__all__"]: + for kkk, ddd in tm.items(): + if ddd and kkk != "__all__" and not tm["__all__"]: assert i[kkk]["out_of_road"] or i[kkk]["arrive_dest"] # # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): @@ -419,9 +424,9 @@ def test_ma_bottleneck_reward_done_alignment(): assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # # print('{} reward passed!'.format(kkk)) - if d["agent0"]: + if tm["agent0"]: break - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -438,24 +443,24 @@ def test_ma_bottleneck_reward_done_alignment(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) env.vehicles["agent0"].set_position(env.vehicles["agent0"].navigation.final_lane.end) assert env.observation_space.contains(obs) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - if d["__all__"]: + o, r, tm, tc, i = _act(env, act) + if tm["__all__"]: break kkk = "agent0" # assert r[kkk] == 999 assert i[kkk]["arrive_dest"] - assert d[kkk] + assert tm[kkk] kkk = "agent1" # assert r[kkk] != 999 assert not i[kkk]["arrive_dest"] - assert not d[kkk] + assert not tm[kkk] break finally: env.close() @@ -480,21 +485,21 @@ def _safe_places(self): env = TestEnv({"num_agents": 1}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) ep_reward = 0.0 for step in range(1000): act = {k: [0, 1] for k in env.vehicles.keys()} - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) ep_reward += next(iter(r.values())) - if any(d.values()): + if any(tm.values()): # print("Finish respawn count: {}, reward {}".format(env._respawn_count, ep_reward)) env._respawn_count += 1 assert ep_reward > 10, ep_reward ep_reward = 0 if env._respawn_count >= len(env._safe_places): break - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -539,26 +544,26 @@ def test_ma_bottleneck_no_short_episode(): }) try: _check_spaces_before_reset(env) - o = env.reset() + o, _ = env.reset() _check_spaces_after_reset(env, o) actions = [[0, 1], [1, 1], [-1, 1]] start = time.time() - d_count = 0 - d = {"__all__": False} + tm_count = 0 + tm = {"__all__": False} for step in range(2000): # act = {k: actions[np.random.choice(len(actions))] for k in o.keys()} act = {k: actions[np.random.choice(len(actions))] for k in env.vehicles.keys()} o_keys = set(o.keys()).union({"__all__"}) - a_keys = set(env.action_space.spaces.keys()).union(set(d.keys())) + a_keys = set(env.action_space.spaces.keys()).union(set(tm.keys())) assert o_keys == a_keys - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): - if d[kkk]: + if tm[kkk]: assert iii["episode_length"] >= 1 - d_count += 1 - if d["__all__"]: - o = env.reset() - d = {"__all__": False} + tm_count += 1 + if tm["__all__"]: + o, _ = env.reset() + tm = {"__all__": False} # if (step + 1) % 100 == 0: # print( # "Finish {}/2000 simulation steps. Time elapse: {:.4f}. Average FPS: {:.4f}".format( @@ -566,7 +571,7 @@ def test_ma_bottleneck_no_short_episode(): # time.time() - start, (step + 1) / (time.time() - start) # ) # ) - if d_count > 200: + if tm_count > 200: break finally: env.close() @@ -578,7 +583,7 @@ def test_ma_bottleneck_horizon_termination(): try: for _ in range(3): # This function is really easy to break, repeat multiple times! _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) should_respawn = set() @@ -591,19 +596,20 @@ def test_ma_bottleneck_horizon_termination(): else: if v_id in env.vehicles: env.vehicles[v_id].set_static(True) - obs, r, d, i = _act(env, act) + obs, r, tm, tc, i = _act(env, act) if step == 0 or step == 1: - assert not any(d.values()) + assert not any(tm.values()) if should_respawn: for kkk in should_respawn: assert kkk not in obs, "It seems the max_step agents is not respawn!" assert kkk not in r - assert kkk not in d + assert kkk not in tm + assert kkk not in tc assert kkk not in i should_respawn.clear() - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk == "__all__": # print("Current: ", step) continue @@ -614,8 +620,8 @@ def test_ma_bottleneck_horizon_termination(): assert not i[kkk]["crash_vehicle"] should_respawn.add(kkk) - if d["__all__"]: - obs = env.reset() + if tm["__all__"]: + obs, _ = env.reset() should_respawn.clear() break finally: @@ -638,7 +644,7 @@ def check_pos(vehicles): env = MultiAgentBottleneckEnv({"horizon": 50, "num_agents": 40, "crash_done": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): @@ -680,14 +686,14 @@ def check_pos(vehicles): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): check_pos(list(env.vehicles.values())) - o, r, d, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) + o, r, tm, tc, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) env.reset() - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -704,12 +710,12 @@ def test_randomize_spawn_place(): } ) try: - obs = env.reset() + obs, _ = env.reset() for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} last_pos = {kkk: v.position for kkk, v in env.vehicles.items()} - o, r, d, i = env.step(act) - obs = env.reset() + o, r, tm, tc, i = env.step(act) + obs, _ = env.reset() new_pos = {kkk: v.position for kkk, v in env.vehicles.items()} for kkk, new_p in new_pos.items(): assert not np.all(new_p == last_pos[kkk]), (new_p, last_pos[kkk], kkk) diff --git a/metadrive/tests/test_env/test_ma_intersection.py b/metadrive/tests/test_env/test_ma_intersection.py index 15fee8e5a..080c073a5 100644 --- a/metadrive/tests/test_env/test_ma_intersection.py +++ b/metadrive/tests/test_env/test_ma_intersection.py @@ -1,7 +1,7 @@ import time import numpy as np -from gym.spaces import Box, Dict +from gymnasium.spaces import Box, Dict from metadrive.envs.marl_envs.marl_intersection import MultiAgentIntersectionEnv from metadrive.utils import distance_greater, norm @@ -59,17 +59,18 @@ def _check_space(env): def _act(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) _check_shape(env) - if not done["__all__"]: + if not terminated["__all__"]: assert len(env.vehicles) > 0 if not (set(obs.keys()) == set(reward.keys()) == set(env.observation_space.spaces.keys())): raise ValueError assert env.observation_space.contains(obs) assert isinstance(reward, dict) assert isinstance(info, dict) - assert isinstance(done, dict) - return obs, reward, done, info + assert isinstance(terminated, dict) + assert isinstance(truncated, dict) + return obs, reward, terminated, truncated, info def test_ma_intersection_env(): @@ -85,14 +86,15 @@ def test_ma_intersection_env(): "vehicle_config": {"lidar": {"num_others": 0}}})]: try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) + assert not any(tc.values()) finally: env.close() @@ -116,38 +118,38 @@ def test_ma_intersection_horizon(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) last_keys = set(env.vehicles.keys()) for step in range(1, 1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) new_keys = set(env.vehicles.keys()) if step == 0: - assert not any(d.values()) - if any(d.values()): + assert not any(tm.values()) + if any(tm.values()): assert len(last_keys) <= 4 # num of agents assert len(new_keys) <= 4 # num of agents for k in new_keys.difference(last_keys): assert k in o - assert k in d + assert k in tm # print("Step {}, Done: {}".format(step, d)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk]["out_of_road"] for kkk, iii in i.items(): if "out_of_road" in iii and (iii["out_of_road"] or iii["cost"] == 778): - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk]["out_of_road"] # #assert r[kkk] == -777 - if d["__all__"]: + if tm["__all__"]: break last_keys = new_keys finally: @@ -158,16 +160,16 @@ def test_ma_intersection_reset(): env = MultiAgentIntersectionEnv({"horizon": 50, "num_agents": 4}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) - if d["__all__"]: - obs = env.reset() + assert not any(tm.values()) + if tm["__all__"]: + obs, _ = env.reset() assert env.observation_space.contains(obs) _check_spaces_after_reset(env, obs) @@ -185,7 +187,7 @@ def test_ma_intersection_reset(): _check_spaces_before_reset(env) success_count = 0 agent_count = 0 - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) @@ -194,7 +196,7 @@ def test_ma_intersection_reset(): # for _ in range(2): # act = {k: [1, 1] for k in env.vehicles.keys()} - # o, r, d, i = _act(env, act) + # o, r, tm, tc, i = _act(env, act) # Force vehicle to success! for v_id, v in env.vehicles.items(): @@ -216,7 +218,7 @@ def test_ma_intersection_reset(): assert env._is_arrive_destination(v) act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v in env.vehicles.values(): assert len(v.navigation.checkpoints) > 2 @@ -226,18 +228,18 @@ def test_ma_intersection_reset(): # # print("{} success!".format(kkk)) success_count += 1 - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": assert i[kkk]["arrive_dest"] agent_count += 1 for kkk, rrr in r.items(): - if d[kkk]: + if tm[kkk]: assert rrr == 777 - if d["__all__"]: + if tm["__all__"]: # print("Finish {} agents. Success {} agents.".format(agent_count, success_count)) - o = env.reset() + o, _ = env.reset() assert env.observation_space.contains(o) _check_spaces_after_reset(env, o) break @@ -269,11 +271,11 @@ def _no_close_spawn(vehicles): try: _check_spaces_before_reset(env) for num_r in range(10): - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) for _ in range(10): - o, r, d, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) - assert not any(d.values()) + o, r, tm, tc, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) + assert not (any(tm.values()) or any(tc.values())) _no_close_spawn(env.vehicles) # print('Finish {} resets.'.format(num_r)) finally: @@ -286,24 +288,24 @@ def _test_ma_intersection_reward_done_alignment(): env = MultiAgentIntersectionEnv({"horizon": 200, "num_agents": 4, "out_of_road_penalty": 777, "crash_done": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for action in [-1, 1]: for step in range(5000): act = {k: [action, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - for kkk, ddd in d.items(): + o, r, tm, tc, i = _act(env, act) + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": # #assert r[kkk] == -777 assert i[kkk]["out_of_road"] # # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["out_of_road"] # # print('{} reward passed!'.format(kkk)) - if d["__all__"]: + if tm["__all__"]: env.reset() break finally: @@ -328,20 +330,20 @@ def _test_ma_intersection_reward_done_alignment(): env._DEBUG_RANDOM_SEED = 1 try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) for step in range(5): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) env.vehicles["agent0"].set_position(env.vehicles["agent1"].position, height=1.2) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) - if not any(d.values()): + if not any(tm.values()): continue - assert sum(d.values()) == 2 + assert sum(tm.values()) == 2 for kkk in ['agent0', 'agent1']: iii = i[kkk] @@ -349,7 +351,7 @@ def _test_ma_intersection_reward_done_alignment(): assert iii["crash"] # #assert r[kkk] == -1.7777 # for kkk, ddd in d.items(): - ddd = d[kkk] + ddd = tm[kkk] if ddd and kkk != "__all__": #assert r[kkk] == -1.7777 assert i[kkk]["crash_vehicle"] @@ -358,7 +360,7 @@ def _test_ma_intersection_reward_done_alignment(): # for kkk, rrr in r.items(): rrr = r[kkk] if rrr == -1.7777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # # print('{} reward passed!'.format(kkk)) @@ -388,11 +390,11 @@ def _test_ma_intersection_reward_done_alignment(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) for step in range(1): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v_id, v in env.vehicles.items(): if v_id != "agent0": @@ -400,14 +402,14 @@ def _test_ma_intersection_reward_done_alignment(): for step in range(5000): act = {k: [0, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): if iii["crash"]: assert iii["crash_vehicle"] if iii["crash_vehicle"]: assert iii["crash"] # #assert r[kkk] == -1.7777 - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": assert i[kkk]["out_of_road"] # # print('{} done passed!'.format(kkk)) @@ -417,9 +419,9 @@ def _test_ma_intersection_reward_done_alignment(): assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # # print('{} reward passed!'.format(kkk)) - if d["agent0"]: + if tm["agent0"]: break - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -436,24 +438,24 @@ def _test_ma_intersection_reward_done_alignment(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) env.vehicles["agent0"].set_position(env.vehicles["agent0"].navigation.final_lane.end) assert env.observation_space.contains(obs) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - if d["__all__"]: + o, r, tm, tc, i = _act(env, act) + if tm["__all__"]: break kkk = "agent0" #assert r[kkk] == 999 assert i[kkk]["arrive_dest"] - assert d[kkk] + assert tm[kkk] kkk = "agent1" #assert r[kkk] != 999 assert not i[kkk]["arrive_dest"] - assert not d[kkk] + assert not tm[kkk] break finally: env.close() @@ -479,21 +481,21 @@ def _safe_places(self): np.random.seed(10) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) ep_reward = 0.0 for step in range(1000): act = {k: [0, 1] for k in env.vehicles.keys()} - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) ep_reward += next(iter(r.values())) - if any(d.values()): + if any(tm.values()): # print("Finish respawn count: {}, reward {}".format(env._respawn_count, ep_reward)) env._respawn_count += 1 assert ep_reward > 10, ep_reward ep_reward = 0 if env._respawn_count >= len(env._safe_places): break - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -538,26 +540,26 @@ def test_ma_intersection_no_short_episode(): }) try: _check_spaces_before_reset(env) - o = env.reset() + o, _ = env.reset() _check_spaces_after_reset(env, o) actions = [[0, 1], [1, 1], [-1, 1]] start = time.time() - d_count = 0 - d = {"__all__": False} + tm_count = 0 + tm = {"__all__": False} for step in range(2000): # act = {k: actions[np.random.choice(len(actions))] for k in o.keys()} act = {k: actions[np.random.choice(len(actions))] for k in env.vehicles.keys()} o_keys = set(o.keys()).union({"__all__"}) - a_keys = set(env.action_space.spaces.keys()).union(set(d.keys())) + a_keys = set(env.action_space.spaces.keys()).union(set(tm.keys())) assert o_keys == a_keys - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): - if d[kkk]: + if tm[kkk]: assert iii["episode_length"] >= 1 - d_count += 1 - if d["__all__"]: - o = env.reset() - d = {"__all__": False} + tm_count += 1 + if tm["__all__"]: + o, _ = env.reset() + tm = {"__all__": False} # if (step + 1) % 100 == 0: # # print( # "Finish {}/2000 simulation steps. Time elapse: {:.4f}. Average FPS: {:.4f}".format( @@ -565,7 +567,7 @@ def test_ma_intersection_no_short_episode(): # time.time() - start, (step + 1) / (time.time() - start) # ) # ) - if d_count > 200: + if tm_count > 200: break finally: env.close() @@ -577,7 +579,7 @@ def test_ma_intersection_horizon_termination(): try: for _ in range(3): # This function is really easy to break, repeat multiple times! _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) should_respawn = set() @@ -590,19 +592,20 @@ def test_ma_intersection_horizon_termination(): else: if v_id in env.vehicles: env.vehicles[v_id].set_static(True) - obs, r, d, i = _act(env, act) + obs, r, tm, tc, i = _act(env, act) if step == 0 or step == 1: - assert not any(d.values()) + assert not any(tm.values()) if should_respawn: for kkk in should_respawn: assert kkk not in obs, "It seems the max_step agents is not respawn!" assert kkk not in r - assert kkk not in d + assert kkk not in tm + assert kkk not in tc assert kkk not in i should_respawn.clear() - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk == "__all__": # print("Current: ", step) continue @@ -613,8 +616,8 @@ def test_ma_intersection_horizon_termination(): assert not i[kkk]["crash_vehicle"] should_respawn.add(kkk) - if d["__all__"]: - obs = env.reset() + if tm["__all__"]: + obs, _ = env.reset() should_respawn.clear() break finally: @@ -637,7 +640,7 @@ def check_pos(vehicles): env = MultiAgentIntersectionEnv({"horizon": 50, "num_agents": 40, "use_render": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): @@ -671,14 +674,14 @@ def check_pos(vehicles): env = MultiAgentIntersectionEnv({"horizon": 300, "num_agents": 40, "delay_done": 0, "use_render": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): check_pos(list(env.vehicles.values())) - o, r, d, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) + o, r, tm, tc, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) env.reset() - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -688,12 +691,12 @@ def test_randomize_spawn_place(): last_pos = {} env = MultiAgentIntersectionEnv({"num_agents": 4, "use_render": False, "force_seed_spawn_manager": False}) try: - obs = env.reset() + obs, _ = env.reset() for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} last_pos = {kkk: v.position for kkk, v in env.vehicles.items()} - o, r, d, i = env.step(act) - obs = env.reset() + o, r, tm, tc, i = env.step(act) + obs, _ = env.reset() new_pos = {kkk: v.position for kkk, v in env.vehicles.items()} for kkk, new_p in new_pos.items(): assert not np.all(new_p == last_pos[kkk]), (new_p, last_pos[kkk], kkk) diff --git a/metadrive/tests/test_env/test_ma_parking_lot.py b/metadrive/tests/test_env/test_ma_parking_lot.py index d2e149a97..2717e8778 100644 --- a/metadrive/tests/test_env/test_ma_parking_lot.py +++ b/metadrive/tests/test_env/test_ma_parking_lot.py @@ -2,7 +2,7 @@ from metadrive.envs.marl_envs.multi_agent_metadrive import MULTI_AGENT_METADRIVE_DEFAULT_CONFIG MULTI_AGENT_METADRIVE_DEFAULT_CONFIG["force_seed_spawn_manager"] = True import numpy as np -from gym.spaces import Box, Dict +from gymnasium.spaces import Box, Dict from metadrive.envs.marl_envs.marl_parking_lot import MultiAgentParkingLotEnv from metadrive.utils import distance_greater, norm @@ -57,17 +57,19 @@ def _check_space(env): def _act(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) _check_shape(env) - if not done["__all__"]: + if not terminated["__all__"]: assert len(env.vehicles) > 0 if not (set(obs.keys()) == set(reward.keys()) == set(env.observation_space.spaces.keys())): raise ValueError assert env.observation_space.contains(obs) assert isinstance(reward, dict) assert isinstance(info, dict) - assert isinstance(done, dict) - return obs, reward, done, info + assert isinstance(terminated, dict) + assert isinstance(truncated, dict) + + return obs, reward, terminated, truncated, info def test_ma_parking_lot_env(): @@ -83,14 +85,16 @@ def test_ma_parking_lot_env(): "vehicle_config": {"lidar": {"num_others": 0}}})]: try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) + assert not any(tc.values()) + finally: env.close() @@ -114,38 +118,39 @@ def test_ma_parking_lot_horizon(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) last_keys = set(env.vehicles.keys()) for step in range(1, 1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) new_keys = set(env.vehicles.keys()) if step == 0: - assert not any(d.values()) - if any(d.values()): + assert not any(tm.values()) + assert not any(tc.values()) + if any(tm.values()): assert len(last_keys) <= 4 # num of agents assert len(new_keys) <= 4 # num of agents for k in new_keys.difference(last_keys): assert k in o - assert k in d + assert k in tm # print("Step {}, Done: {}".format(step, d)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk]["out_of_road"] for kkk, iii in i.items(): if "out_of_road" in iii and (iii["out_of_road"] or iii["cost"] == 778): - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk]["out_of_road"] #assert r[kkk] == -777 - if d["__all__"]: + if tm["__all__"]: break last_keys = new_keys finally: @@ -156,16 +161,17 @@ def test_ma_parking_lot_reset(): env = MultiAgentParkingLotEnv({"horizon": 50, "num_agents": 11}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) - if d["__all__"]: - obs = env.reset() + assert not any(tm.values()) + assert not any(tc.values()) + if tm["__all__"]: + obs, _ = env.reset() assert env.observation_space.contains(obs) _check_spaces_after_reset(env, obs) @@ -183,7 +189,7 @@ def test_ma_parking_lot_reset(): _check_spaces_before_reset(env) success_count = 0 agent_count = 0 - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) @@ -192,7 +198,7 @@ def test_ma_parking_lot_reset(): # for _ in range(2): # act = {k: [1, 1] for k in env.vehicles.keys()} - # o, r, d, i = _act(env, act) + # o, r, tm, tc, i = _act(env, act) # Force vehicle to success! for v_id, v in env.vehicles.items(): @@ -214,7 +220,7 @@ def test_ma_parking_lot_reset(): assert env._is_arrive_destination(v) act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v in env.vehicles.values(): assert len(v.navigation.checkpoints) > 2 @@ -224,18 +230,18 @@ def test_ma_parking_lot_reset(): # # print("{} success!".format(kkk)) success_count += 1 - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": assert i[kkk]["arrive_dest"] agent_count += 1 for kkk, rrr in r.items(): - if d[kkk]: + if tm[kkk]: assert rrr == 777 - if d["__all__"]: + if tm["__all__"]: # print("Finish {} agents. Success {} agents.".format(agent_count, success_count)) - o = env.reset() + o, _ = env.reset() assert env.observation_space.contains(o) _check_spaces_after_reset(env, o) break @@ -262,11 +268,12 @@ def _no_close_spawn(vehicles): try: _check_spaces_before_reset(env) for num_r in range(10): - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) for _ in range(10): - o, r, d, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) - assert not any(d.values()) + o, r, tm, tc, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) + assert not any(tm.values()) + assert not any(tc.values()) _no_close_spawn(env.vehicles) # print('Finish {} resets.'.format(num_r)) finally: @@ -279,15 +286,15 @@ def test_ma_parking_lot_reward_done_alignment(): env = MultiAgentParkingLotEnv({"horizon": 200, "num_agents": 11, "out_of_road_penalty": 777, "crash_done": False}) try: _check_spaces_before_reset(env) - obs = env.reset(force_seed=0) + obs, _ = env.reset(seed=0) _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) out_num = 0 for action in [-1, 1]: for step in range(5000): act = {k: [action, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - for kkk, ddd in d.items(): + o, r, tm, tc, i = _act(env, act) + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": #assert r[kkk] == -777 assert i[kkk]["out_of_road"] or i[kkk]["max_step"] @@ -296,11 +303,11 @@ def test_ma_parking_lot_reward_done_alignment(): # # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["out_of_road"] # # print('{} reward passed!'.format(kkk)) - if d["__all__"]: - env.reset(force_seed=0) + if tm["__all__"]: + env.reset(seed=0) break assert out_num > 10 finally: @@ -325,17 +332,17 @@ def test_ma_parking_lot_reward_done_alignment(): env._DEBUG_RANDOM_SEED = 1 try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) for step in range(5): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) env.vehicles["agent0"].set_position(env.vehicles["agent1"].position, height=1.2) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) - if not any(d.values()): + if not any(tm.values()): continue # assert sum(d.values()) == 2 @@ -346,7 +353,7 @@ def test_ma_parking_lot_reward_done_alignment(): assert iii["crash"] #assert r[kkk] == -1.7777 # for kkk, ddd in d.items(): - ddd = d[kkk] + ddd = tm[kkk] if ddd and kkk != "__all__": #assert r[kkk] == -1.7777 assert i[kkk]["crash_vehicle"] @@ -355,7 +362,7 @@ def test_ma_parking_lot_reward_done_alignment(): # for kkk, rrr in r.items(): rrr = r[kkk] if rrr == -1.7777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # # print('{} reward passed!'.format(kkk)) @@ -386,13 +393,13 @@ def test_ma_parking_lot_reward_done_alignment(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() env.engine.spawn_manager.np_random = np.random.RandomState(0) - obs = env.reset(force_seed=0) + obs, _ = env.reset(seed=0) _check_spaces_after_reset(env, obs) for step in range(1): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v_id, v in env.vehicles.items(): if v_id != "agent0": @@ -400,14 +407,14 @@ def test_ma_parking_lot_reward_done_alignment(): out_num = 0 for step in range(5000): act = {k: [0, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): if iii["crash"]: assert iii["crash_vehicle"] if iii["crash_vehicle"]: assert iii["crash"] # #assert r[kkk] == -1.7777 - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": assert i[kkk]["out_of_road"] or i[kkk]["max_step"] if i[kkk]["out_of_road"]: @@ -419,9 +426,9 @@ def test_ma_parking_lot_reward_done_alignment(): assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # # print('{} reward passed!'.format(kkk)) - if d["agent0"]: + if tm["agent0"]: break - if d["__all__"]: + if tm["__all__"]: break assert out_num > 0 finally: @@ -439,24 +446,24 @@ def test_ma_parking_lot_reward_done_alignment(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) env.vehicles["agent0"].set_position(env.vehicles["agent0"].navigation.final_lane.end) assert env.observation_space.contains(obs) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - if d["__all__"]: + o, r, tm, tc, i = _act(env, act) + if tm["__all__"]: break kkk = "agent0" #assert r[kkk] == 999 assert i[kkk]["arrive_dest"] - assert d[kkk] + assert tm[kkk] kkk = "agent1" #assert r[kkk] != 999 assert not i[kkk]["arrive_dest"] - assert not d[kkk] + assert not tm[kkk] break finally: env.close() @@ -502,26 +509,26 @@ def test_ma_parking_lot_no_short_episode(): }) try: _check_spaces_before_reset(env) - o = env.reset() + o, _ = env.reset() _check_spaces_after_reset(env, o) actions = [[0, 1], [1, 1], [-1, 1]] start = time.time() - d_count = 0 - d = {"__all__": False} + tm_count = 0 + tm = {"__all__": False} for step in range(2000): # act = {k: actions[np.random.choice(len(actions))] for k in o.keys()} act = {k: actions[np.random.choice(len(actions))] for k in env.vehicles.keys()} o_keys = set(o.keys()).union({"__all__"}) - a_keys = set(env.action_space.spaces.keys()).union(set(d.keys())) + a_keys = set(env.action_space.spaces.keys()).union(set(tm.keys())) assert o_keys == a_keys - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): - if d[kkk]: + if tm[kkk]: assert iii["episode_length"] >= 1 - d_count += 1 - if d["__all__"]: - o = env.reset() - d = {"__all__": False} + tm_count += 1 + if tm["__all__"]: + o, _ = env.reset() + tm = {"__all__": False} # if (step + 1) % 100 == 0: # # print( # "Finish {}/2000 simulation steps. Time elapse: {:.4f}. Average FPS: {:.4f}".format( @@ -529,7 +536,7 @@ def test_ma_parking_lot_no_short_episode(): # time.time() - start, (step + 1) / (time.time() - start) # ) # ) - if d_count > 200: + if tm_count > 200: break finally: env.close() @@ -541,7 +548,7 @@ def test_ma_parking_lot_horizon_termination(): try: for _ in range(3): # This function is really easy to break, repeat multiple times! _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) should_respawn = set() @@ -549,20 +556,22 @@ def test_ma_parking_lot_horizon_termination(): act = {k: [0, 0] for k in env.vehicles.keys()} for v_id in act.keys(): env.vehicles[v_id].set_static(True) - obs, r, d, i = _act(env, act) + obs, r, tm, tc, i = _act(env, act) # env.render("top_down", camera_position=(42.5, 0), film_size=(500, 500)) if step == 0 or step == 1: - assert not any(d.values()) + assert not any(tm.values()) + assert not any(tc.values()) if should_respawn: for kkk in should_respawn: assert kkk not in obs, "It seems the max_step agents is not respawn!" assert kkk not in r - assert kkk not in d + assert kkk not in tm + assert kkk not in tc assert kkk not in i should_respawn.clear() - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk == "__all__": # print("Current: ", step) continue @@ -573,8 +582,8 @@ def test_ma_parking_lot_horizon_termination(): assert not i[kkk]["crash_vehicle"] should_respawn.add(kkk) - if d["__all__"]: - obs = env.reset() + if tm["__all__"]: + obs, _ = env.reset() should_respawn.clear() break finally: @@ -597,7 +606,7 @@ def check_pos(vehicles): env = MultiAgentParkingLotEnv({"horizon": 50, "num_agents": 32, "parking_space_num": 32, "use_render": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): @@ -631,14 +640,14 @@ def check_pos(vehicles): env = MultiAgentParkingLotEnv({"horizon": 300, "num_agents": 11, "delay_done": 0, "use_render": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): check_pos(list(env.vehicles.values())) - o, r, d, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) + o, r, tm, tc, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) env.reset() - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -648,12 +657,12 @@ def test_randomize_spawn_place(): last_pos = {} env = MultiAgentParkingLotEnv({"num_agents": 4, "use_render": False, "force_seed_spawn_manager": False}) try: - obs = env.reset() + obs, _ = env.reset() for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} last_pos = {kkk: v.position for kkk, v in env.vehicles.items()} - o, r, d, i = env.step(act) - obs = env.reset() + o, r, tm, tc, i = env.step(act) + obs, _ = env.reset() new_pos = {kkk: v.position for kkk, v in env.vehicles.items()} for kkk, new_p in new_pos.items(): assert not np.all(new_p == last_pos[kkk]), (new_p, last_pos[kkk], kkk) diff --git a/metadrive/tests/test_env/test_ma_roundabout_env.py b/metadrive/tests/test_env/test_ma_roundabout_env.py index 45b70ebda..1943842b3 100644 --- a/metadrive/tests/test_env/test_ma_roundabout_env.py +++ b/metadrive/tests/test_env/test_ma_roundabout_env.py @@ -2,7 +2,7 @@ from metadrive.envs.marl_envs.multi_agent_metadrive import MULTI_AGENT_METADRIVE_DEFAULT_CONFIG MULTI_AGENT_METADRIVE_DEFAULT_CONFIG["force_seed_spawn_manager"] = True import numpy as np -from gym.spaces import Box, Dict +from gymnasium.spaces import Box, Dict from metadrive.constants import TerminationState from metadrive.envs.marl_envs.marl_inout_roundabout import MultiAgentRoundaboutEnv @@ -58,17 +58,19 @@ def _check_space(env): def _act(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) _check_shape(env) - if not done["__all__"]: + if not terminated["__all__"]: assert len(env.vehicles) > 0 if not (set(obs.keys()) == set(reward.keys()) == set(env.observation_space.spaces.keys())): raise ValueError assert env.observation_space.contains(obs) assert isinstance(reward, dict) assert isinstance(info, dict) - assert isinstance(done, dict) - return obs, reward, done, info + assert isinstance(terminated, dict) + assert isinstance(truncated, dict) + + return obs, reward, terminated, truncated, info def test_ma_roundabout_env(): @@ -84,14 +86,15 @@ def test_ma_roundabout_env(): "vehicle_config": {"lidar": {"num_others": 0}}})]: try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) + assert not any(tc.values()) finally: env.close() @@ -115,39 +118,40 @@ def test_ma_roundabout_horizon(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) last_keys = set(env.vehicles.keys()) for step in range(1, 1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) new_keys = set(env.vehicles.keys()) if step == 0: - assert not any(d.values()) - if any(d.values()): + assert not any(tm.values()) + assert not any(tc.values()) + if any(tm.values()): assert len(last_keys) <= 4 # num of agents assert len(new_keys) <= 4 # num of agents for k in new_keys.difference(last_keys): assert k in o - assert k in d + assert k in tm # print("Step {}, Done: {}".format(step, d)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk][TerminationState.OUT_OF_ROAD] for kkk, iii in i.items(): if (TerminationState.OUT_OF_ROAD in iii and iii[TerminationState.OUT_OF_ROAD]) or \ ("cost" in iii and iii["cost"] == 778): - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk][TerminationState.OUT_OF_ROAD] #assert r[kkk] == -777 - if d["__all__"]: + if tm["__all__"]: break last_keys = new_keys finally: @@ -158,16 +162,16 @@ def test_ma_roundabout_reset(): env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 4}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) - if d["__all__"]: - obs = env.reset() + assert not any(tm.values()) + if tm["__all__"]: + obs, _ = env.reset() assert env.observation_space.contains(obs) _check_spaces_after_reset(env, obs) @@ -185,7 +189,7 @@ def test_ma_roundabout_reset(): _check_spaces_before_reset(env) success_count = 0 agent_count = 0 - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) @@ -194,7 +198,7 @@ def test_ma_roundabout_reset(): # # for _ in range(2): # act = {k: [1, 1] for k in env.vehicles.keys()} - # o, r, d, i = _act(env, act) + # o, r, tm, tc, i = _act(env, act) # Force vehicle to success! for v_id, v in env.vehicles.items(): @@ -215,7 +219,7 @@ def test_ma_roundabout_reset(): assert env._is_arrive_destination(v) act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v in env.vehicles.values(): assert len(v.navigation.checkpoints) > 2 @@ -225,18 +229,18 @@ def test_ma_roundabout_reset(): # # print("{} success!".format(kkk)) success_count += 1 - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": assert i[kkk][TerminationState.SUCCESS] agent_count += 1 for kkk, rrr in r.items(): - if d[kkk]: + if tm[kkk]: assert rrr == 777 - if d["__all__"]: + if tm["__all__"]: # print("Finish {} agents. Success {} agents.".format(agent_count, success_count)) - o = env.reset() + o, _ = env.reset() assert env.observation_space.contains(o) _check_spaces_after_reset(env, o) break @@ -259,11 +263,12 @@ def _no_close_spawn(vehicles): try: _check_spaces_before_reset(env) for num_r in range(10): - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) for _ in range(10): - o, r, d, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) - assert not any(d.values()) + o, r, tm, tc, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) + assert not any(tm.values()) + assert not any(tc.values()) _no_close_spawn(env.vehicles) # print('Finish {} resets.'.format(num_r)) finally: @@ -276,24 +281,24 @@ def test_ma_roundabout_reward_done_alignment(): env = MultiAgentRoundaboutEnv({"horizon": 200, "num_agents": 4, "out_of_road_penalty": 777, "crash_done": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for action in [-1, 1]: for step in range(5000): act = {k: [action, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - for kkk, ddd in d.items(): + o, r, tm, tc, i = _act(env, act) + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": #assert r[kkk] == -777 assert i[kkk][TerminationState.OUT_OF_ROAD] # # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk][TerminationState.OUT_OF_ROAD] # # print('{} reward passed!'.format(kkk)) - if d["__all__"]: + if tm["__all__"]: env.reset() break finally: @@ -320,28 +325,28 @@ def test_ma_roundabout_reward_done_alignment_1(): env._DEBUG_RANDOM_SEED = 1 try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) for step in range(5): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) env.vehicles["agent0"].set_position(env.vehicles["agent1"].position, height=1.2) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) - if not any(d.values()): + if not any(tm.values()): continue - assert sum(d.values()) == 2 + assert sum(tm.values()) == 2 for kkk in ['agent0', 'agent1']: iii = i[kkk] assert iii[TerminationState.CRASH_VEHICLE] assert iii[TerminationState.CRASH] #assert r[kkk] == -1.7777 - # for kkk, ddd in d.items(): - ddd = d[kkk] + # for kkk, ddd in tm.items(): + ddd = tm[kkk] if ddd and kkk != "__all__": #assert r[kkk] == -1.7777 assert i[kkk][TerminationState.CRASH_VEHICLE] @@ -350,12 +355,12 @@ def test_ma_roundabout_reward_done_alignment_1(): # for kkk, rrr in r.items(): rrr = r[kkk] if rrr == -1.7777: - assert d[kkk] + assert tm[kkk] assert i[kkk][TerminationState.CRASH_VEHICLE] assert i[kkk][TerminationState.CRASH] # # print('{} reward passed!'.format(kkk)) - # assert d["__all__"] - # if d["__all__"]: + # assert tm["__all__"] + # if tm["__all__"]: break finally: env._DEBUG_RANDOM_SEED = None @@ -380,11 +385,11 @@ def test_ma_roundabout_reward_done_alignment_1(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) for step in range(1): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v_id, v in env.vehicles.items(): if v_id != "agent0": @@ -392,7 +397,7 @@ def test_ma_roundabout_reward_done_alignment_1(): for step in range(5000): act = {k: [0, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): if iii[TerminationState.CRASH]: assert iii[TerminationState.CRASH_VEHICLE] @@ -404,9 +409,9 @@ def test_ma_roundabout_reward_done_alignment_1(): assert i[kkk][TerminationState.CRASH_VEHICLE] assert i[kkk][TerminationState.CRASH] # # print('{} reward passed!'.format(kkk)) - if d["agent0"]: + if tm["agent0"]: break - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -423,24 +428,24 @@ def test_ma_roundabout_reward_done_alignment_1(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) env.vehicles["agent0"].set_position(env.vehicles["agent0"].navigation.final_lane.end) assert env.observation_space.contains(obs) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - if d["__all__"]: + o, r, tm, tc, i = _act(env, act) + if tm["__all__"]: break kkk = "agent0" #assert r[kkk] == 999 assert i[kkk][TerminationState.SUCCESS] - assert d[kkk] + assert tm[kkk] kkk = "agent1" #assert r[kkk] != 999 assert not i[kkk][TerminationState.SUCCESS] - assert not d[kkk] + assert not tm[kkk] break finally: env.close() @@ -465,21 +470,21 @@ def _safe_places(self): env = TestEnv({"num_agents": 1}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) ep_reward = 0.0 for step in range(1000): act = {k: [0, 1] for k in env.vehicles.keys()} - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) ep_reward += next(iter(r.values())) - if any(d.values()): + if any(tm.values()): # print("Finish respawn count: {}, reward {}".format(env._respawn_count, ep_reward)) env._respawn_count += 1 assert ep_reward > 10, ep_reward ep_reward = 0 if env._respawn_count >= len(env._safe_places): break - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -524,26 +529,26 @@ def test_ma_roundabout_no_short_episode(): }) try: _check_spaces_before_reset(env) - o = env.reset() + o, _ = env.reset() _check_spaces_after_reset(env, o) actions = [[0, 1], [1, 1], [-1, 1]] start = time.time() - d_count = 0 - d = {"__all__": False} + tm_count = 0 + tm = {"__all__": False} for step in range(2000): # act = {k: actions[np.random.choice(len(actions))] for k in o.keys()} act = {k: actions[np.random.choice(len(actions))] for k in env.vehicles.keys()} o_keys = set(o.keys()).union({"__all__"}) - a_keys = set(env.action_space.spaces.keys()).union(set(d.keys())) + a_keys = set(env.action_space.spaces.keys()).union(set(tm.keys())) assert o_keys == a_keys - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): - if d[kkk]: + if tm[kkk]: assert iii["episode_length"] >= 1 - d_count += 1 - if d["__all__"]: - o = env.reset() - d = {"__all__": False} + tm_count += 1 + if tm["__all__"]: + o, _ = env.reset() + tm = {"__all__": False} # if (step + 1) % 100 == 0: # # print( # "Finish {}/2000 simulation steps. Time elapse: {:.4f}. Average FPS: {:.4f}".format( @@ -551,7 +556,7 @@ def test_ma_roundabout_no_short_episode(): # time.time() - start, (step + 1) / (time.time() - start) # ) # ) - if d_count > 200: + if tm_count > 200: break finally: env.close() @@ -563,7 +568,7 @@ def test_ma_roundabout_horizon_termination(): try: for _ in range(3): # This function is really easy to break, repeat multiple times! _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) should_respawn = set() @@ -576,19 +581,21 @@ def test_ma_roundabout_horizon_termination(): else: if v_id in env.vehicles: env.vehicles[v_id].set_static(True) - obs, r, d, i = _act(env, act) + obs, r, tm, tc, i = _act(env, act) if step == 0 or step == 1: - assert not any(d.values()) + assert not any(tm.values()) + assert not any(tc.values()) if should_respawn: for kkk in should_respawn: assert kkk not in obs, "It seems the max_step agents is not respawn!" assert kkk not in r - assert kkk not in d + assert kkk not in tm + assert kkk not in tc assert kkk not in i should_respawn.clear() - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk == "__all__": # print("Current: ", step) continue @@ -599,8 +606,8 @@ def test_ma_roundabout_horizon_termination(): assert not i[kkk][TerminationState.CRASH_VEHICLE] should_respawn.add(kkk) - if d["__all__"]: - obs = env.reset() + if tm["__all__"]: + obs, _ = env.reset() should_respawn.clear() break finally: @@ -623,7 +630,7 @@ def check_pos(vehicles): env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 40}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): @@ -655,13 +662,13 @@ def check_pos(vehicles): env = MultiAgentRoundaboutEnv({"horizon": 300, "num_agents": 40, "delay_done": 0}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(300): check_pos(list(env.vehicles.values())) - o, r, d, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) - if d["__all__"]: + o, r, tm, tc, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) + if tm["__all__"]: break finally: env.close() @@ -671,12 +678,12 @@ def test_randomize_spawn_place(): last_pos = {} env = MultiAgentRoundaboutEnv({"num_agents": 4, "use_render": False, "force_seed_spawn_manager": False}) try: - obs = env.reset() + obs, _ = env.reset() for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} last_pos = {kkk: v.position for kkk, v in env.vehicles.items()} - o, r, d, i = env.step(act) - obs = env.reset() + o, r, tm, tc, i = env.step(act) + obs, _ = env.reset() new_pos = {kkk: v.position for kkk, v in env.vehicles.items()} for kkk, new_p in new_pos.items(): assert not np.all(new_p == last_pos[kkk]), (new_p, last_pos[kkk], kkk) diff --git a/metadrive/tests/test_env/test_ma_tollgate.py b/metadrive/tests/test_env/test_ma_tollgate.py index 599e61be6..032e79b25 100644 --- a/metadrive/tests/test_env/test_ma_tollgate.py +++ b/metadrive/tests/test_env/test_ma_tollgate.py @@ -2,7 +2,7 @@ from metadrive.envs.marl_envs.multi_agent_metadrive import MULTI_AGENT_METADRIVE_DEFAULT_CONFIG MULTI_AGENT_METADRIVE_DEFAULT_CONFIG["force_seed_spawn_manager"] = True import numpy as np -from gym.spaces import Box, Dict +from gymnasium.spaces import Box, Dict from metadrive.envs.marl_envs.marl_tollgate import MultiAgentTollgateEnv from metadrive.utils import distance_greater, norm @@ -57,17 +57,18 @@ def _check_space(env): def _act(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) _check_shape(env) - if not done["__all__"]: + if not terminated["__all__"]: assert len(env.vehicles) > 0 if not (set(obs.keys()) == set(reward.keys()) == set(env.observation_space.spaces.keys())): raise ValueError assert env.observation_space.contains(obs) assert isinstance(reward, dict) assert isinstance(info, dict) - assert isinstance(done, dict) - return obs, reward, done, info + assert isinstance(terminated, dict) + assert isinstance(truncated, dict) + return obs, reward, terminated, truncated, info def test_ma_toll_env(): @@ -82,14 +83,16 @@ def test_ma_toll_env(): "vehicle_config": {"lidar": {"num_others": 0}}})]: try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) + assert not any(tm.values()) + assert not any(tc.values()) + finally: env.close() @@ -113,38 +116,38 @@ def test_ma_toll_horizon(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) last_keys = set(env.vehicles.keys()) for step in range(1, 1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) new_keys = set(env.vehicles.keys()) if step == 0: - assert not any(d.values()) - if any(d.values()): + assert not any(tm.values()) + if any(tm.values()): assert len(last_keys) <= 4 # num of agents assert len(new_keys) <= 4 # num of agents for k in new_keys.difference(last_keys): assert k in o - assert k in d + assert k in tm # print("Step {}, Done: {}".format(step, d)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk]["out_of_road"] for kkk, iii in i.items(): if "out_of_road" in iii and (iii["out_of_road"] or iii["cost"] == 778): - assert d[kkk] + assert tm[kkk] assert i[kkk]["cost"] == 778 assert i[kkk]["out_of_road"] #assert r[kkk] == -777 - if d["__all__"]: + if tm["__all__"]: break last_keys = new_keys finally: @@ -155,16 +158,17 @@ def test_ma_toll_reset(): env = MultiAgentTollgateEnv({"horizon": 50, "num_agents": 4}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(1000): act = {k: [1, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) if step == 0: - assert not any(d.values()) - if d["__all__"]: - obs = env.reset() + assert not any(tm.values()) + assert not any(tc.values()) + if tm["__all__"]: + obs, _ = env.reset() assert env.observation_space.contains(obs) _check_spaces_after_reset(env, obs) @@ -182,7 +186,7 @@ def test_ma_toll_reset(): _check_spaces_before_reset(env) success_count = 0 agent_count = 0 - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) @@ -191,7 +195,7 @@ def test_ma_toll_reset(): # for _ in range(2): # act = {k: [1, 1] for k in env.vehicles.keys()} - # o, r, d, i = _act(env, act) + # o, r, tm, tc, i = _act(env, act) # Force vehicle to success! for v_id, v in env.vehicles.items(): @@ -213,7 +217,7 @@ def test_ma_toll_reset(): assert env._is_arrive_destination(v) act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v in env.vehicles.values(): assert len(v.navigation.checkpoints) > 2 @@ -223,18 +227,18 @@ def test_ma_toll_reset(): # # print("{} success!".format(kkk)) success_count += 1 - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": assert i[kkk]["arrive_dest"] agent_count += 1 for kkk, rrr in r.items(): - if d[kkk]: + if tm[kkk]: assert rrr == 777 - if d["__all__"]: + if tm["__all__"]: # print("Finish {} agents. Success {} agents.".format(agent_count, success_count)) - o = env.reset() + o, _ = env.reset() assert env.observation_space.contains(o) _check_spaces_after_reset(env, o) break @@ -266,11 +270,11 @@ def _no_close_spawn(vehicles): try: _check_spaces_before_reset(env) for num_r in range(10): - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) for _ in range(10): - o, r, d, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) - assert not any(d.values()) + o, r, tm, tc, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) + assert not any(tm.values()) _no_close_spawn(env.vehicles) # print('Finish {} resets.'.format(num_r)) finally: @@ -283,15 +287,15 @@ def test_ma_toll_reward_done_alignment_1(): env = MultiAgentTollgateEnv({"horizon": 200, "num_agents": 4, "out_of_road_penalty": 777, "crash_done": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for action in [-1, 1]: for step in range(5000): act = {k: [action, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - for kkk, ddd in d.items(): - if ddd and kkk != "__all__" and not d["__all__"] and not i[kkk]["max_step"]: + o, r, tm, tc, i = _act(env, act) + for kkk, ddd in tm.items(): + if ddd and kkk != "__all__" and not tm["__all__"] and not i[kkk]["max_step"]: if r[kkk] != -777: raise ValueError #assert r[kkk] == -777 @@ -299,10 +303,10 @@ def test_ma_toll_reward_done_alignment_1(): # # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): if rrr == -777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["out_of_road"] # # print('{} reward passed!'.format(kkk)) - if d["__all__"]: + if tm["__all__"]: env.reset() break finally: @@ -327,28 +331,28 @@ def test_ma_toll_reward_done_alignment_1(): env._DEBUG_RANDOM_SEED = 1 try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) for step in range(5): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) env.vehicles["agent0"].set_position(env.vehicles["agent1"].position, height=1.2) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) - if not any(d.values()): + if not any(tm.values()): continue - assert sum(d.values()) == 2 + assert sum(tm.values()) == 2 for kkk in ['agent0', 'agent1']: iii = i[kkk] assert iii["crash_vehicle"] assert iii["crash"] #assert r[kkk] == -1.7777 - # for kkk, ddd in d.items(): - ddd = d[kkk] + # for kkk, ddd in te.items(): + ddd = tm[kkk] if ddd and kkk != "__all__": #assert r[kkk] == -1.7777 assert i[kkk]["crash_vehicle"] @@ -357,12 +361,12 @@ def test_ma_toll_reward_done_alignment_1(): # for kkk, rrr in r.items(): rrr = r[kkk] if rrr == -1.7777: - assert d[kkk] + assert tm[kkk] assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # # print('{} reward passed!'.format(kkk)) - # assert d["__all__"] - # if d["__all__"]: + # assert te["__all__"] + # if te["__all__"]: break finally: env._DEBUG_RANDOM_SEED = None @@ -389,11 +393,11 @@ def test_ma_toll_reward_done_alignment_2(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) for step in range(1): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for v_id, v in env.vehicles.items(): if v_id != "agent0": @@ -401,13 +405,13 @@ def test_ma_toll_reward_done_alignment_2(): for step in range(5000): act = {k: [0, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): if iii["crash_vehicle"]: assert iii["crash"] # #assert r[kkk] == -1.7777 - for kkk, ddd in d.items(): - if ddd and kkk != "__all__" and not d["__all__"]: + for kkk, ddd in tm.items(): + if ddd and kkk != "__all__" and not tm["__all__"]: assert i[kkk]["out_of_road"] or i[kkk]["arrive_dest"] or i[kkk]["crash_building"] # # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): @@ -416,9 +420,9 @@ def test_ma_toll_reward_done_alignment_2(): assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # # print('{} reward passed!'.format(kkk)) - if d["agent0"]: + if tm["agent0"]: break - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -435,24 +439,24 @@ def test_ma_toll_reward_done_alignment_2(): ) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) env.vehicles["agent0"].set_position(env.vehicles["agent0"].navigation.final_lane.end) assert env.observation_space.contains(obs) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - if d["__all__"]: + o, r, tm, tc, i = _act(env, act) + if tm["__all__"]: break kkk = "agent0" #assert r[kkk] == 999 assert i[kkk]["arrive_dest"] - assert d[kkk] + assert tm[kkk] kkk = "agent1" #assert r[kkk] != 999 assert not i[kkk]["arrive_dest"] - assert not d[kkk] + assert not tm[kkk] break finally: env.close() @@ -477,21 +481,21 @@ def _safe_places(self): env = TestEnv({"num_agents": 1}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env) ep_reward = 0.0 for step in range(1000): act = {k: [0, 1] for k in env.vehicles.keys()} - o, r, d, i = env.step(act) + o, r, tm, tc, i = env.step(act) ep_reward += next(iter(r.values())) - if any(d.values()): + if any(tm.values()): # print("Finish respawn count: {}, reward {}".format(env._respawn_count, ep_reward)) env._respawn_count += 1 assert ep_reward > 10, ep_reward ep_reward = 0 if env._respawn_count >= len(env._safe_places): break - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -536,26 +540,26 @@ def test_ma_toll_no_short_episode(): }) try: _check_spaces_before_reset(env) - o = env.reset() + o, _ = env.reset() _check_spaces_after_reset(env, o) actions = [[0, 1], [1, 1], [-1, 1]] start = time.time() - d_count = 0 - d = {"__all__": False} + tm_count = 0 + tm = {"__all__": False} for step in range(2000): # act = {k: actions[np.random.choice(len(actions))] for k in o.keys()} act = {k: actions[np.random.choice(len(actions))] for k in env.vehicles.keys()} o_keys = set(o.keys()).union({"__all__"}) - a_keys = set(env.action_space.spaces.keys()).union(set(d.keys())) + a_keys = set(env.action_space.spaces.keys()).union(set(tm.keys())) assert o_keys == a_keys - o, r, d, i = _act(env, act) + o, r, tm, tc, i = _act(env, act) for kkk, iii in i.items(): - if d[kkk]: + if tm[kkk]: assert iii["episode_length"] >= 1 - d_count += 1 - if d["__all__"]: - o = env.reset() - d = {"__all__": False} + tm_count += 1 + if tm["__all__"]: + o, _ = env.reset() + tm = {"__all__": False} # if (step + 1) % 100 == 0: # # print( # "Finish {}/2000 simulation steps. Time elapse: {:.4f}. Average FPS: {:.4f}".format( @@ -563,7 +567,7 @@ def test_ma_toll_no_short_episode(): # time.time() - start, (step + 1) / (time.time() - start) # ) # ) - if d_count > 200: + if tm_count > 200: break finally: env.close() @@ -579,7 +583,7 @@ def test_ma_toll_horizon_termination(vis=False): try: for _ in range(3): # This function is really easy to break, repeat multiple times! _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) should_respawn = set() @@ -592,23 +596,25 @@ def test_ma_toll_horizon_termination(vis=False): else: if v_id in env.vehicles: env.vehicles[v_id].set_static(True) - obs, r, d, i = _act(env, act) + obs, r, tm, tc, i = _act(env, act) if vis: env.render("topdown") if step == 0 or step == 1: - assert not any(d.values()) + assert not any(tm.values()) + assert not any(tc.values()) if should_respawn: for kkk in should_respawn: assert kkk not in obs, "It seems the max_step agents is not respawn!" assert kkk not in r - assert kkk not in d + assert kkk not in tm + assert kkk not in tc assert kkk not in i should_respawn.clear() - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk == "__all__": # print("Current: ", step) continue @@ -619,8 +625,8 @@ def test_ma_toll_horizon_termination(vis=False): assert not i[kkk]["crash_vehicle"] should_respawn.add(kkk) - if d["__all__"]: - obs = env.reset() + if tm["__all__"]: + obs, _ = env.reset() should_respawn.clear() break finally: @@ -643,7 +649,7 @@ def check_pos(vehicles): env = MultiAgentTollgateEnv({"horizon": 50, "num_agents": 36}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): @@ -677,14 +683,14 @@ def check_pos(vehicles): env = MultiAgentTollgateEnv({"horizon": 300, "num_agents": 36, "delay_done": 0, "use_render": False}) try: _check_spaces_before_reset(env) - obs = env.reset() + obs, _ = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): check_pos(list(env.vehicles.values())) - o, r, d, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) + o, r, tm, tc, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) env.reset() - if d["__all__"]: + if tm["__all__"]: break finally: env.close() @@ -694,12 +700,12 @@ def test_randomize_spawn_place(): last_pos = {} env = MultiAgentTollgateEnv({"num_agents": 4, "use_render": False, "force_seed_spawn_manager": False}) try: - obs = env.reset() + obs, _ = env.reset() for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} last_pos = {kkk: v.position for kkk, v in env.vehicles.items()} - o, r, d, i = env.step(act) - obs = env.reset() + o, r, tm, tc, i = env.step(act) + obs, _ = env.reset() new_pos = {kkk: v.position for kkk, v in env.vehicles.items()} for kkk, new_p in new_pos.items(): assert not np.all(new_p == last_pos[kkk]), (new_p, last_pos[kkk], kkk) diff --git a/metadrive/tests/test_env/test_metadrive_env.py b/metadrive/tests/test_env/test_metadrive_env.py index b92cb46d2..7a21d2273 100644 --- a/metadrive/tests/test_env/test_metadrive_env.py +++ b/metadrive/tests/test_env/test_metadrive_env.py @@ -44,7 +44,7 @@ def _act(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) assert env.observation_space.contains(obs) assert np.isscalar(reward) assert isinstance(info, dict) @@ -56,7 +56,7 @@ def _act(env, action): def test_pgdrive_env_blackbox(config): env = MetaDriveEnv(config=copy.deepcopy(config)) try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) _act(env, env.action_space.sample()) for x in [-1, 0, 1]: diff --git a/metadrive/tests/test_env/test_naive_multi_agent.py b/metadrive/tests/test_env/test_naive_multi_agent.py index 27341702b..41e175772 100644 --- a/metadrive/tests/test_env/test_naive_multi_agent.py +++ b/metadrive/tests/test_env/test_naive_multi_agent.py @@ -1,18 +1,18 @@ -import gym +import gymnasium as gym from metadrive.envs.marl_envs.multi_agent_metadrive import MultiAgentMetaDrive def _a(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) assert env.observation_space.contains(obs) assert isinstance(info, dict) def _step(env): try: - obs = env.reset() + obs, _ = env.reset() assert env.observation_space.contains(obs) for _ in range(5): _a(env, env.action_space.sample()) @@ -38,13 +38,13 @@ def test_naive_multi_agent_metadrive(): ) try: assert isinstance(env.action_space, gym.spaces.Dict) - obs = env.reset() + obs, _ = env.reset() assert isinstance(obs, dict) env.action_space.seed(0) for step in range(100): a = env.action_space.sample() assert isinstance(a, dict) - o, r, d, i = env.step(a) + o, r, tm, tc, i = env.step(a) pos_z_list = [v.chassis.getNode(0).transform.pos[2] for v in env.vehicles.values()] for p in pos_z_list: @@ -52,9 +52,10 @@ def test_naive_multi_agent_metadrive(): assert isinstance(o, dict) assert isinstance(r, dict) - assert isinstance(d, dict) + assert isinstance(tm, dict) + assert isinstance(tc, dict) assert isinstance(i, dict) - if d["__all__"]: + if tm["__all__"]: break _step(env) diff --git a/metadrive/tests/test_env/test_safe_env.py b/metadrive/tests/test_env/test_safe_env.py index 124df15a3..8d66df0b9 100644 --- a/metadrive/tests/test_env/test_safe_env.py +++ b/metadrive/tests/test_env/test_safe_env.py @@ -11,14 +11,14 @@ def test_safe_env(vis=False): env = SafeMetaDriveEnv(config) try: - o = env.reset() + o, _ = env.reset() total_cost = 0 for ep in range(5): for i in range(1, 100): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) total_cost += info["cost"] assert env.observation_space.contains(o) - if d: + if tm or tc: total_cost = 0 # print("Reset") env.reset() diff --git a/metadrive/tests/test_env/test_top_down_env.py b/metadrive/tests/test_env/test_top_down_env.py index 5b225a8d7..ac7d633d9 100644 --- a/metadrive/tests/test_env/test_top_down_env.py +++ b/metadrive/tests/test_env/test_top_down_env.py @@ -15,7 +15,7 @@ def test_top_down_rendering(): ]: try: for _ in range(5): - o = env.reset() + o, _ = env.reset() assert np.mean(o) > 0.0 for _ in range(10): o, *_ = env.step([0, 1]) @@ -30,24 +30,24 @@ def test_top_down_rendering(): def _vis_top_down_with_panda_render(): env = TopDownMetaDrive(dict(use_render=True)) try: - o = env.reset() + o, _ = env.reset() for i in range(1000): - o, r, d, i = env.step([0, 1]) - if d: + o, r, tm, tc, i = env.step([0, 1]) + if tm or tc: break finally: env.close() def _vis_top_down_with_panda_render_and_top_down_visualization(): - env = TopDownMetaDrive(dict(use_render=True)) + env = TopDownMetaDrive({"use_render": True, "render_mode": "top_down"}) try: - o = env.reset() + o, _ = env.reset() for i in range(2000): - o, r, d, i = env.step([0, 1]) - if d: + o, r, tm, tc, i = env.step([0, 1]) + if tm or tc: break - env.render(mode="topdown") + env.render() finally: env.close() diff --git a/metadrive/tests/test_env/test_varying_dynamics_env.py b/metadrive/tests/test_env/test_varying_dynamics_env.py index 16f19a812..878298f9b 100644 --- a/metadrive/tests/test_env/test_varying_dynamics_env.py +++ b/metadrive/tests/test_env/test_varying_dynamics_env.py @@ -8,7 +8,7 @@ def test_varying_dynamics_env(): try: dys = [] for seed in range(10): - env.reset(force_seed=seed) + env.reset(seed=seed) for _ in range(10): env.step(env.action_space.sample()) dy = env.vehicle.get_dynamics_parameters() diff --git a/metadrive/tests/test_env/test_waymo_env.py b/metadrive/tests/test_env/test_waymo_env.py index 0b5781bfa..b0f062581 100644 --- a/metadrive/tests/test_env/test_waymo_env.py +++ b/metadrive/tests/test_env/test_waymo_env.py @@ -23,10 +23,10 @@ def test_waymo_env(policy, render=False, num_scenarios=3): } ) for seed in range(0, num_scenarios): - env.reset(force_seed=seed) + env.reset(seed=seed) for i in range(1000): - o, r, d, info = env.step([1.0, 0.]) - if d: + o, r, tm, tc, info = env.step([1.0, 0.]) + if tm or tc: assert info["arrive_dest"], "Can not arrive dest" print("{} track_length: ".format(env.engine.global_seed), info["track_length"]) # assert info["arrive_dest"], "Can not arrive dest" @@ -58,10 +58,10 @@ def test_store_map_memory_leakage(render=False): for _ in range(10): # test twp times for testing loading stored map for seed in range(3): - env.reset(force_seed=seed) + env.reset(seed=seed) for i in range(1000): - o, r, d, info = env.step([1.0, 0.]) - if d: + o, r, tm, tc, info = env.step([1.0, 0.]) + if tm or tc: assert info["arrive_dest"], "Can not arrive dest" assert env.episode_step > 60 break diff --git a/metadrive/tests/test_export_record_scenario/test_avoid_duplicate_name.py b/metadrive/tests/test_export_record_scenario/test_avoid_duplicate_name.py index 54c7f0a13..0d8a90e5f 100644 --- a/metadrive/tests/test_export_record_scenario/test_avoid_duplicate_name.py +++ b/metadrive/tests/test_export_record_scenario/test_avoid_duplicate_name.py @@ -29,20 +29,20 @@ def test_save_recreate_scenario_respawn_traffic(vis=False): env = SafeMetaDriveEnv(cfg) try: positions_1 = [] - o = env.reset() + o, _ = env.reset() for i in range(1, 1000): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) positions_1.append({v.name: v.position for v in env.engine.traffic_manager.spawned_objects.values()}) epi_info = env.engine.record_manager.get_episode_metadata() env.close() env = SafeMetaDriveEnv(cfg) env.config["replay_episode"] = epi_info env.config["record_episode"] = False - o = env.reset() + o, _ = env.reset() positions_1.reverse() for i in range(0, 1000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) position = positions_1.pop() position = {env.engine.replay_manager.record_name_to_current_name[key]: v for key, v in position.items()} current_position = {v.name: v.position for v in env.engine.replay_manager.spawned_objects.values()} diff --git a/metadrive/tests/test_export_record_scenario/test_export_scenario.py b/metadrive/tests/test_export_record_scenario/test_export_scenario.py index 69045ae1a..02af97a71 100644 --- a/metadrive/tests/test_export_record_scenario/test_export_scenario.py +++ b/metadrive/tests/test_export_record_scenario/test_export_scenario.py @@ -33,10 +33,11 @@ def test_export_metadrive_scenario(render_export_env=False, render_load_env=Fals ) ) for index in range(num_scenarios): - env.reset(force_seed=index) + env.reset(seed=index) done = False while not done: - o, r, done, i = env.step([0, 0]) + o, r, tm, tc, i = env.step([0, 0]) + done = tm or tc finally: env.close() if dir is not None: @@ -76,12 +77,13 @@ def test_export_waymo_scenario(num_scenarios=3, render_export_env=False, render_ ) for index in range(num_scenarios): print("Start replaying scenario {}".format(index)) - env.reset(force_seed=index) + env.reset(seed=index) done = False count = 0 while not done: - o, r, done, i = env.step([0, 0]) + o, r, tm, tc, i = env.step([0, 0]) count += 1 + done = tm or tc print("Finish replaying scenario {} with step {}".format(index, count)) finally: env.close() diff --git a/metadrive/tests/test_export_record_scenario/test_save_replay_episode.py b/metadrive/tests/test_export_record_scenario/test_save_replay_episode.py index 0588166be..6ed3c7459 100644 --- a/metadrive/tests/test_export_record_scenario/test_save_replay_episode.py +++ b/metadrive/tests/test_export_record_scenario/test_save_replay_episode.py @@ -25,6 +25,7 @@ def test_save_episode(vis=False): "start_seed": 1000, # "manual_control": vis, "use_render": False, + "render_mode": "top_down", "agent_policy": IDMPolicy, "traffic_mode": TrafficMode.Trigger, "record_episode": save_episode, @@ -38,7 +39,7 @@ def test_save_episode(vis=False): ) step_info = [] try: - o = env.reset() + o, _ = env.reset() for i in range(1, 100000 if vis else 2000): step_info.append( { @@ -46,16 +47,16 @@ def test_save_episode(vis=False): for name, obj in env.engine._spawned_objects.items() } ) - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) if vis: - env.render(mode="top_down", road_color=(35, 35, 35)) - if d: + env.render(road_color=(35, 35, 35)) + if tm or tc: epi_info = env.engine.dump_episode("test_dump_single.pkl" if test_dump else None) break # with open("../test_export_record_scenario/test_dump_single.pkl", "rb") as f: env.config["replay_episode"] = epi_info env.config["record_episode"] = False - o = env.reset() + o, _ = env.reset() for i in range(0, 100000 if vis else 2000): # if i % 5 ==0: for old_id, new_id in env.engine.replay_manager.record_name_to_current_name.items(): @@ -70,9 +71,9 @@ def test_save_episode(vis=False): assert np.isclose(np.array([pos[0], pos[1]]), np.array(step_info[i][old_id][0]), 1e-2, 1e-2).all() assert abs(wrap_to_pi(heading - np.array(step_info[i][old_id][1]))) < 1e-2 # assert abs(env.vehicle.get_z() - record_pos[-1]) < 1e-3 - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) if vis: - env.render(mode="top_down", ) + env.render() if info.get("replay_done", False): break finally: @@ -82,7 +83,7 @@ def test_save_episode(vis=False): def test_save_episode_marl(vis=False): """ 1. Set record_episode=True to record each episode - 2. dump_episode when done[__all__] == True + 2. dump_episode when terminated[__all__] == True 3. You can keep recent episodes 4. Input episode data to reset() function can replay the episode ! """ @@ -97,18 +98,18 @@ def test_save_episode_marl(vis=False): ) try: # Test Record - o = env.reset(force_seed=0) + o, _ = env.reset(seed=0) epi_info = None # for tt in range(10, 100): tt = 13 # print("\nseed: {}\n".format(tt)) env.engine.spawn_manager.seed(tt) - o = env.reset() + o, _ = env.reset() for i in range(1, 100000 if vis else 600): - o, r, d, info = env.step({agent_id: [0, .2] for agent_id in env.vehicles.keys()}) + o, r, tm, tc, info = env.step({agent_id: [0, .2] for agent_id in env.vehicles.keys()}) if vis: env.render() - if d["__all__"]: + if tm["__all__"]: epi_info = env.engine.dump_episode("test_dump.pkl") # test dump json # if test_dump: @@ -124,7 +125,7 @@ def test_save_episode_marl(vis=False): env.config["replay_episode"] = epi_info env.config["record_episode"] = False - o = env.reset() + o, _ = env.reset() for i in range(1, 100000 if vis else 2000): # if i % 5 ==0: for old_id, new_id in env.engine.replay_manager.record_name_to_current_name.items(): @@ -136,10 +137,10 @@ def test_save_episode_marl(vis=False): assert np.isclose(np.array([pos[0], pos[1], obj.get_z()]), np.array(record_pos)).all() assert abs(wrap_to_pi(heading - record_heading)) < 1e-2 # print("Replay MARL step: {}".format(i)) - o, r, d, info = env.step({agent_id: [0, 0.1] for agent_id in env.vehicles.keys()}) + o, r, tm, tc, info = env.step({agent_id: [0, 0.1] for agent_id in env.vehicles.keys()}) if vis: env.render() - if d["__all__"]: + if tm["__all__"]: break finally: env.close() diff --git a/metadrive/tests/test_export_record_scenario/test_save_replay_via_policy.py b/metadrive/tests/test_export_record_scenario/test_save_replay_via_policy.py index f768f4c46..e34dc4641 100644 --- a/metadrive/tests/test_export_record_scenario/test_save_replay_via_policy.py +++ b/metadrive/tests/test_export_record_scenario/test_save_replay_via_policy.py @@ -39,27 +39,27 @@ def test_save_recreate_scenario(vis=False): env = SafeMetaDriveEnv(cfg) try: positions_1 = [] - o = env.reset() + o, _ = env.reset() epi_info = env.engine.record_manager.get_episode_metadata() for i in range(1, 100000 if vis else 2000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) positions_1.append({v.name: v.position for v in env.engine.traffic_manager.spawned_objects.values()}) - if d: + if tm or tc: break env.close() env = SafeMetaDriveEnv(cfg) env.config["replay_episode"] = epi_info env.config["record_episode"] = False env.config["only_reset_when_replay"] = True - o = env.reset() + o, _ = env.reset() positions_1.reverse() for i in range(0, 100000 if vis else 2000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) position = positions_1.pop() position = {env.engine.replay_manager.record_name_to_current_name[key]: v for key, v in position.items()} current_position = {v.name: v.position for v in env.engine.traffic_manager.spawned_objects.values()} assert_equal_pos(position, current_position) - if d: + if tm or tc: break finally: env.close() diff --git a/metadrive/tests/test_functionality/test_collision.py b/metadrive/tests/test_functionality/test_collision.py index 3e0cd782e..29c5a7b8d 100644 --- a/metadrive/tests/test_functionality/test_collision.py +++ b/metadrive/tests/test_functionality/test_collision.py @@ -6,11 +6,11 @@ def test_collision_with_vehicle(use_render=False): env = MetaDriveEnv({"traffic_density": 1.0, "map": "SSS"}) else: env = MetaDriveEnv({"traffic_density": 1.0, "map": "SSS", "use_render": True}) - o = env.reset() + o, _ = env.reset() pass_test = False try: for i in range(1, 500): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) if env.vehicle.crash_vehicle: pass_test = True break @@ -21,11 +21,11 @@ def test_collision_with_vehicle(use_render=False): def test_collision_with_sidewalk(): env = MetaDriveEnv({"traffic_density": .0}) - o = env.reset() + o, _ = env.reset() pass_test = False try: for i in range(1, 100): - o, r, d, info = env.step([-0.5, 1]) + o, r, tm, tc, info = env.step([-0.5, 1]) if env.vehicle.crash_sidewalk: pass_test = True break @@ -36,12 +36,12 @@ def test_collision_with_sidewalk(): def test_line_contact(): env = MetaDriveEnv({"traffic_density": .0}) - o = env.reset() + o, _ = env.reset() on_broken_line = False on_continuous_line = False try: for i in range(1, 100): - o, r, d, info = env.step([-0.5, 1]) + o, r, tm, tc, info = env.step([-0.5, 1]) on_broken_line = on_broken_line or env.vehicle.on_broken_line on_continuous_line = on_continuous_line or env.vehicle.on_white_continuous_line assert on_broken_line and on_continuous_line, "Collision function is broken!" diff --git a/metadrive/tests/test_functionality/test_discrete_action.py b/metadrive/tests/test_functionality/test_discrete_action.py index 84ee3e4f8..5ce2f816e 100644 --- a/metadrive/tests/test_functionality/test_discrete_action.py +++ b/metadrive/tests/test_functionality/test_discrete_action.py @@ -1,4 +1,4 @@ -import gym.spaces +import gymnasium as gym from metadrive.envs.metadrive_env import MetaDriveEnv from metadrive.utils import setup_logger @@ -56,7 +56,7 @@ def test_discrete_action(): assert policy.convert_to_continuous_action(14) == (1, 1) for _ in range(20): - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) finally: env.close() @@ -87,7 +87,7 @@ def test_multi_discrete_action(): assert policy.convert_to_continuous_action([2, 4]) == (1, 1) for _ in range(20): - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) finally: env.close() diff --git a/metadrive/tests/test_functionality/test_episode_release.py b/metadrive/tests/test_functionality/test_episode_release.py index fa9ae48e0..2966d47a6 100644 --- a/metadrive/tests/test_functionality/test_episode_release.py +++ b/metadrive/tests/test_functionality/test_episode_release.py @@ -12,7 +12,7 @@ def test_episode_release(): "debug": True } ) - o = env.reset() + o, _ = env.reset() for i in range(1, 10): env.step([1.0, 1.0]) env.step([1.0, 1.0]) diff --git a/metadrive/tests/test_functionality/test_export_map.py b/metadrive/tests/test_functionality/test_export_map.py index ec446ccbf..97caa84cc 100644 --- a/metadrive/tests/test_functionality/test_export_map.py +++ b/metadrive/tests/test_functionality/test_export_map.py @@ -24,7 +24,7 @@ def test_export_waymo_map(render=False): ) try: for seed in range(3): - env.reset(force_seed=seed) + env.reset(seed=seed) map_vector = env.current_map.get_map_features() draw_map(map_vector, True if render else False) finally: @@ -34,7 +34,7 @@ def test_export_waymo_map(render=False): def test_metadrive_map_export(render=False): env = MetaDriveEnv(dict(image_observation=False, map=6, num_scenarios=1, start_seed=0)) try: - env.reset(force_seed=0) + env.reset(seed=0) map_vector = env.current_map.get_map_features() draw_map(map_vector, True if render else False) finally: @@ -53,7 +53,7 @@ def _test_nuplan_map_export(render=False): } ) try: - env.reset(force_seed=0) + env.reset(seed=0) map_vector = env.current_map.get_map_features() draw_map(map_vector, True if render else False) finally: diff --git a/metadrive/tests/test_functionality/test_fps.py b/metadrive/tests/test_functionality/test_fps.py index 2f01257a6..3a6030b93 100644 --- a/metadrive/tests/test_functionality/test_fps.py +++ b/metadrive/tests/test_functionality/test_fps.py @@ -12,14 +12,14 @@ def _test_fps(): num_scenarios=1000, start_seed=1010, )) - obs = env.reset() + obs, _ = env.reset() start = time.time() action = [0.0, 1.] total_steps = 5000 vehicle_num = [len(env.engine.traffic_manager.vehicles)] for s in range(total_steps): - o, r, d, i = env.step(action) - if d: + o, r, tm, tc, i = env.step(action) + if tm or tc: env.reset() vehicle_num.append(len(env.engine.traffic_manager.vehicles)) assert total_steps / (time.time() - start) > 200 diff --git a/metadrive/tests/test_functionality/test_gen_map_alignment.py b/metadrive/tests/test_functionality/test_gen_map_alignment.py index 0dc9a5d5b..1ee21f552 100644 --- a/metadrive/tests/test_functionality/test_gen_map_alignment.py +++ b/metadrive/tests/test_functionality/test_gen_map_alignment.py @@ -20,7 +20,7 @@ def test_gen_map_alignment(): env = MetaDriveEnv(generate_config) for i in range(env_num): - env.reset(force_seed=i) + env.reset(seed=i) data_2 = env.engine.map_manager.dump_all_maps(file_name="test_10maps.pickle") recursive_equal(data_1.copy(), data_2.copy(), need_assert=True) diff --git a/metadrive/tests/test_functionality/test_gen_map_read.py b/metadrive/tests/test_functionality/test_gen_map_read.py index 1234d397a..e3ca21474 100644 --- a/metadrive/tests/test_functionality/test_gen_map_read.py +++ b/metadrive/tests/test_functionality/test_gen_map_read.py @@ -37,7 +37,7 @@ def test_gen_map_read(): recursive_equal(m, origin, need_assert=True) for seed in tqdm.tqdm(range(env_num), desc="Test Scenario"): - env.reset(force_seed=seed) + env.reset(seed=seed) for i in range(10): env.step(env.action_space.sample()) # print("Finish!") diff --git a/metadrive/tests/test_functionality/test_get_closest_lane.py b/metadrive/tests/test_functionality/test_get_closest_lane.py index 62c69cadb..2c0fe7fbf 100644 --- a/metadrive/tests/test_functionality/test_get_closest_lane.py +++ b/metadrive/tests/test_functionality/test_get_closest_lane.py @@ -12,9 +12,9 @@ def test_get_lane_index(use_render=False): } ) try: - o = env.reset() + o, _ = env.reset() for i in range(1, 500): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) for v in env.engine.traffic_manager.vehicles: old_res = env.current_map.road_network.get_closest_lane_index(v.position, True) old_lane_idx = [index[1] for index in old_res] diff --git a/metadrive/tests/test_functionality/test_loading_map_from_json.py b/metadrive/tests/test_functionality/test_loading_map_from_json.py index 6081f0e53..c62d74d36 100644 --- a/metadrive/tests/test_functionality/test_loading_map_from_json.py +++ b/metadrive/tests/test_functionality/test_loading_map_from_json.py @@ -30,7 +30,7 @@ def _test_loaded_map_alignment(): e.close() e = MetaDriveEnv({"start_seed": seed, "num_scenarios": 10}) - e.reset(force_seed=seed) + e.reset(seed=seed) map_data_realtime_generate_in_multiple_maps = e.current_map.get_meta_data() e.close() diff --git a/metadrive/tests/test_functionality/test_marl_infinite_agents.py b/metadrive/tests/test_functionality/test_marl_infinite_agents.py index 11c834e4b..d655cdd8d 100644 --- a/metadrive/tests/test_functionality/test_marl_infinite_agents.py +++ b/metadrive/tests/test_functionality/test_marl_infinite_agents.py @@ -14,12 +14,12 @@ def test_infinite_agents(): } ) try: - o = env.reset() + o, _ = env.reset() env.seed(100) env._DEBUG_RANDOM_SEED = 100 max_num = old_num_of_vehicles = len(env.vehicles) for i in range(1, 1000): - o, r, d, info = env.step({k: [1, 1] for k in env.vehicles}) + o, r, tm, tc, info = env.step({k: [1, 1] for k in env.vehicles}) # # print( # "{} Current active agents: ".format(i), len(env.vehicles), ". Objects: ", # len(env.agent_manager._object_to_agent) @@ -27,10 +27,10 @@ def test_infinite_agents(): max_num = max(len(env.vehicles), max_num) # env.render(mode="top_down") for kkk, iii in info.items(): - if d[kkk]: + if tm[kkk] or tc[kkk]: assert iii["episode_length"] >= 1 - if d["__all__"]: - o = env.reset() + if tm["__all__"]: + o, _ = env.reset() # # print("Finish {} steps.".format(i)) finally: env._DEBUG_RANDOM_SEED = None @@ -38,19 +38,19 @@ def test_infinite_agents(): env = MultiAgentRoundaboutEnv({"num_agents": -1, "delay_done": 0, "horizon": 50, "debug": True}) try: - o = env.reset() + o, _ = env.reset() max_num = old_num_of_vehicles = len(env.vehicles) for i in range(1, 300): - o, r, d, info = env.step({k: [0, 1] for k in env.vehicles}) + o, r, tm, tc, info = env.step({k: [0, 1] for k in env.vehicles}) # # print("Current active agents: ", len(env.vehicles), # ". Objects: ", len(env.agent_manager._object_to_agent)) max_num = max(len(env.vehicles), max_num) # env.render(mode="top_down") for kkk, iii in info.items(): - if d[kkk]: + if tm[kkk] or tc[kkk]: assert iii["episode_length"] >= 1 - if d["__all__"]: - o = env.reset() + if tm["__all__"]: + o, _ = env.reset() # print("Finish {} steps.".format(i)) finally: env.close() diff --git a/metadrive/tests/test_functionality/test_marl_reborn.py b/metadrive/tests/test_functionality/test_marl_reborn.py index f1826ad8b..dab51800e 100644 --- a/metadrive/tests/test_functionality/test_marl_reborn.py +++ b/metadrive/tests/test_functionality/test_marl_reborn.py @@ -21,7 +21,7 @@ def test_respawn(): assert set(env.config["target_vehicle_configs"].keys()) == {"agent0", "agent1"} assert set(env.vehicles.keys()) == set() # Not initialized yet! - o = env.reset() + o, _ = env.reset() assert set(env.observations.keys()) == {"agent0", "agent1"} @@ -37,31 +37,31 @@ def test_respawn(): tracks = [] done_count = 0 for i in range(1, 1000): - o, r, d, info = env.step({v_id_0: [-1, 1], v_id_1: [1, 1]}) + o, r, tm, tc, info = env.step({v_id_0: [-1, 1], v_id_1: [1, 1]}) assert set(o.keys()) == set(r.keys()) == set(info.keys()) - assert set(o.keys()).union({"__all__"}) == set(d.keys()) - tracks.append(d) - if d[v_id_0]: + assert set(o.keys()).union({"__all__"}) == set(tm.keys()) + tracks.append(tm) + if tm[v_id_0]: assert info[v_id_0][TerminationState.OUT_OF_ROAD] assert info[v_id_0]["cost"] == out_of_road_cost assert r[v_id_0] == -out_of_road_penalty v_id_0 = "agent{}".format(count) count += 1 done_count += 1 - if d[v_id_1]: + if tm[v_id_1]: assert info[v_id_1][TerminationState.OUT_OF_ROAD] assert info[v_id_1]["cost"] == out_of_road_cost assert r[v_id_1] == -out_of_road_penalty v_id_1 = "agent{}".format(count) count += 1 done_count += 1 - if all(d.values()): + if all(tm.values()): raise ValueError() if i % 100 == 0: # Horizon v_id_0 = "agent0" v_id_1 = "agent1" count = 2 - o = env.reset() + o, _ = env.reset() assert set(o.keys()) == {"agent0", "agent1"} assert set(env.observations.keys()) == {"agent0", "agent1"} assert set(env.action_space.spaces.keys()) == {"agent0", "agent1"} @@ -102,25 +102,26 @@ def test_delay_done(render=False): try: agent0_done = False agent1_already_hit = False - o = env.reset() + o, _ = env.reset() for i in range(1, 300): actions = {"agent0": [1, 1], "agent1": [1, 1]} if "agent0" not in env.vehicles: actions.pop("agent0") if "agent1" not in env.vehicles: actions.pop("agent1") - o, r, d, info = env.step(actions) + o, r, tm, tc, info = env.step(actions) if agent0_done: assert "agent0" not in o assert "agent0" not in info - assert "agent0" not in d - if d.get("agent0"): + assert "agent0" not in tm + assert "agent0" not in tc + if tm.get("agent0") or tc.get("agent0"): agent0_done = True if agent0_done: if info["agent1"][TerminationState.CRASH_VEHICLE]: agent1_already_hit = True # print("Hit!") - if d["__all__"]: + if tm["__all__"]: assert agent1_already_hit agent0_done = False agent1_already_hit = False @@ -133,15 +134,15 @@ def test_delay_done(render=False): env.reset() dead = set() for _ in range(300): - o, r, d, i = env.step({k: [1, 1] for k in env.vehicles.keys()}) + o, r, tm, tc, i = env.step({k: [1, 1] for k in env.vehicles.keys()}) for dead_name in dead: assert dead_name not in o # print("{} there!".format(env.vehicles.keys())) # print("{} dead!".format([kkk for kkk, ddd in d.items() if ddd])) - for kkk, ddd in d.items(): + for kkk, ddd in tm.items(): if ddd and kkk != "__all__": dead.add(kkk) - if d["__all__"]: + if tm["__all__"]: env.reset() dead.clear() finally: diff --git a/metadrive/tests/test_functionality/test_navigation.py b/metadrive/tests/test_functionality/test_navigation.py index 98fb0a4e0..297266eb1 100644 --- a/metadrive/tests/test_functionality/test_navigation.py +++ b/metadrive/tests/test_functionality/test_navigation.py @@ -38,7 +38,7 @@ def test_navigation(vis=False): } ) target = Target(0.375, 30) - o = env.reset() + o, _ = env.reset() if vis: env.engine.accept('d', target.go_right) env.engine.accept('a', target.go_left) @@ -54,7 +54,7 @@ def test_navigation(vis=False): acc_error = env.vehicles[env.DEFAULT_AGENT].speed_km_h - target.speed_km_h acc = acc_controller.get_result(acc_error) for i in range(1, 1000000 if vis else 2000): - o, r, d, info = env.step([-steering, acc]) + o, r, tm, tc, info = env.step([-steering, acc]) # calculate new action steering_error = o[0] - target.lateral @@ -77,9 +77,9 @@ def test_navigation(vis=False): env.engine.on_screen_message.data.clear() else: env.render() - if d: + if tm or tc: # print("Reset") - o = env.reset() + o, _ = env.reset() steering_controller.reset() steering_error = o[0] - target.lateral diff --git a/metadrive/tests/test_functionality/test_object_collision_detection.py b/metadrive/tests/test_functionality/test_object_collision_detection.py index 5770f6442..6756ea9de 100644 --- a/metadrive/tests/test_functionality/test_object_collision_detection.py +++ b/metadrive/tests/test_functionality/test_object_collision_detection.py @@ -193,7 +193,7 @@ def test_object_collision_detection(render=False): } ) try: - o = env.reset() + o, _ = env.reset() lane_index = (">>", ">>>", 0) lane = env.current_map.road_network.get_lane(lane_index) longitude = 22 @@ -218,7 +218,7 @@ def test_object_collision_detection(render=False): crash_obj = False detect_obj = False for i in range(1, 100000 if render else 2000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) for obj in env.observations[DEFAULT_AGENT].detected_objects: if isinstance(obj, TrafficCone): detect_obj = True diff --git a/metadrive/tests/test_functionality/test_obs_action_space.py b/metadrive/tests/test_functionality/test_obs_action_space.py index a56dfb5ed..34e10a22c 100644 --- a/metadrive/tests/test_functionality/test_obs_action_space.py +++ b/metadrive/tests/test_functionality/test_obs_action_space.py @@ -8,9 +8,9 @@ def setUp(self): self.env = MetaDriveEnv() def test_obs_space(self): - obs = self.env.reset() + obs, _ = self.env.reset() assert self.env.observation_space.contains(obs), (self.env.observation_space, obs.shape) - obs, _, _, _ = self.env.step(self.env.action_space.sample()) + obs, _, _, _, _ = self.env.step(self.env.action_space.sample()) assert self.env.observation_space.contains(obs), (self.env.observation_space, obs.shape) def tearDown(self): diff --git a/metadrive/tests/test_functionality/test_obs_noise.py b/metadrive/tests/test_functionality/test_obs_noise.py index 3472a986d..c2b1da8fa 100644 --- a/metadrive/tests/test_functionality/test_obs_noise.py +++ b/metadrive/tests/test_functionality/test_obs_noise.py @@ -12,7 +12,7 @@ def _act(env, action): assert env.action_space.contains(action) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) assert env.observation_space.contains(obs) assert np.isscalar(reward) assert isinstance(info, dict) @@ -23,7 +23,7 @@ def _act(env, action): def test_obs_noise(): env = MetaDriveEnv({"vehicle_config": {"lidar": {"gaussian_noise": 1.0, "dropout_prob": 1.0}}}) try: - obs = env.reset() + obs, _ = env.reset() obs_cls = env.observations[env.DEFAULT_AGENT] assert isinstance(obs_cls, LidarStateObservation) ret = obs_cls._add_noise_to_cloud_points([0.5, 0.5, 0.5], gaussian_noise=1.0, dropout_prob=1.0) @@ -38,7 +38,7 @@ def test_obs_noise(): env.close() env = MetaDriveEnv({"vehicle_config": {"lidar": {"gaussian_noise": 0.0, "dropout_prob": 0.0}}}) try: - obs = env.reset() + obs, _ = env.reset() obs_cls = env.observations[env.DEFAULT_AGENT] assert isinstance(obs_cls, LidarStateObservation) ret = obs_cls._add_noise_to_cloud_points([0.5, 0.5, 0.5], gaussian_noise=0.0, dropout_prob=0.0) diff --git a/metadrive/tests/test_functionality/test_out_of_road.py b/metadrive/tests/test_functionality/test_out_of_road.py index d011f3cb8..89f11750d 100644 --- a/metadrive/tests/test_functionality/test_out_of_road.py +++ b/metadrive/tests/test_functionality/test_out_of_road.py @@ -16,11 +16,11 @@ def test_out_of_road(): ) ) try: - obs = env.reset() + obs, _ = env.reset() tolerance = math.sqrt(env.vehicle.WIDTH**2 + env.vehicle.LENGTH**2) / distance for _ in range(100000000): - o, r, d, i = env.step([steering, 1]) - if d: + o, r, tm, tc, i = env.step([steering, 1]) + if tm or tc: points = \ env.vehicle.side_detector.perceive(env.vehicle, env.vehicle.engine.physics_world.static_world).cloud_points @@ -46,7 +46,7 @@ def useless_left_right_distance_printing(): ) try: for _ in range(100000000): - o, r, d, i = env.step([steering, 1]) + o, r, tm, tc, i = env.step([steering, 1]) vehicle = env.vehicle l, r = vehicle.dist_to_left_side, vehicle.dist_to_right_side total_width = float( diff --git a/metadrive/tests/test_functionality/test_pedestrian.py b/metadrive/tests/test_functionality/test_pedestrian.py index 61dbc1b5b..fcc4b72e2 100644 --- a/metadrive/tests/test_functionality/test_pedestrian.py +++ b/metadrive/tests/test_functionality/test_pedestrian.py @@ -42,7 +42,7 @@ def test_pedestrian(render=False): obj_2.set_velocity([1, 0], 0, in_local_frame=True) env.vehicle.set_velocity([5, 0], in_local_frame=False) for s in range(1, 1000): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) # obj_1.set_velocity([1, 0], 2, in_local_frame=True) # obj_2.set_velocity([1, 0], 0.8, in_local_frame=True) if s == 300: diff --git a/metadrive/tests/test_functionality/test_random_engine.py b/metadrive/tests/test_functionality/test_random_engine.py index 1aa17f01b..6bc778f58 100644 --- a/metadrive/tests/test_functionality/test_random_engine.py +++ b/metadrive/tests/test_functionality/test_random_engine.py @@ -11,7 +11,7 @@ def test_seeding(): env.seed(999) # assert env.engine is None assert env.current_seed == 999 - env.reset(force_seed=992) + env.reset(seed=992) assert env.current_seed == 992 # assert env.engine is not None finally: @@ -58,7 +58,7 @@ def test_map_random_seeding(): try: env.reset() env.reset() - env.reset(force_seed=5) + env.reset(seed=5) map_configs.append(env.current_map.get_meta_data()) lane_num.append(len(env.current_map.road_network.graph[FirstPGBlock.NODE_1][FirstPGBlock.NODE_2])) lane_width.append( @@ -82,7 +82,7 @@ def test_fixed_traffic(): try: last_pos = None for i in range(20): - obs = env.reset() + obs, _ = env.reset() assert env.engine.traffic_manager.random_seed == env.current_seed new_pos = [v.position for v in env.engine.traffic_manager.vehicles] if last_pos is not None and len(new_pos) == len(last_pos): @@ -109,7 +109,7 @@ def test_random_traffic(): try: last_pos = None for i in range(10): - obs = env.reset(force_seed=5) + obs, _ = env.reset(seed=5) assert env.engine.traffic_manager.random_traffic new_pos = [v.position for v in env.engine.traffic_manager.traffic_vehicles] if len(new_pos) > 0: @@ -135,15 +135,15 @@ def test_random_lane_width(): } ) try: - o = env.reset(force_seed=12) + o, _ = env.reset(seed=12) old_config_1 = env.vehicle.lane.width - env.reset(force_seed=15) + env.reset(seed=15) old_config_2 = env.vehicle.lane.width - env.reset(force_seed=13) - env.reset(force_seed=12) + env.reset(seed=13) + env.reset(seed=12) new_config = env.vehicle.lane.width assert old_config_1 == new_config - env.reset(force_seed=15) + env.reset(seed=15) new_config = env.vehicle.lane.width assert old_config_2 == new_config assert old_config_1 != old_config_2 @@ -162,21 +162,21 @@ def test_random_lane_num(): } ) try: - o = env.reset(force_seed=12) + o, _ = env.reset(seed=12) old_config_1 = env.vehicle.navigation.get_current_lane_num() - env.reset(force_seed=15) + env.reset(seed=15) old_config_2 = env.vehicle.navigation.get_current_lane_num() - env.reset(force_seed=13) - env.reset(force_seed=12) + env.reset(seed=13) + env.reset(seed=12) new_config = env.vehicle.navigation.get_current_lane_num() assert old_config_1 == new_config - env.reset(force_seed=15) + env.reset(seed=15) new_config = env.vehicle.navigation.get_current_lane_num() assert old_config_2 == new_config env.close() - env.reset(force_seed=12) + env.reset(seed=12) assert old_config_1 == env.vehicle.navigation.get_current_lane_num() - env.reset(force_seed=15) + env.reset(seed=15) assert old_config_2 == env.vehicle.navigation.get_current_lane_num() finally: env.close() @@ -193,15 +193,15 @@ def test_random_vehicle_parameter(): } ) try: - o = env.reset(force_seed=12) + o, _ = env.reset(seed=12) old_config_1 = env.vehicle.get_config(True) - env.reset(force_seed=15) + env.reset(seed=15) old_config_2 = env.vehicle.get_config(True) - env.reset(force_seed=13) - env.reset(force_seed=12) + env.reset(seed=13) + env.reset(seed=12) new_config = env.vehicle.get_config(True) assert recursive_equal(old_config_1, new_config) - env.reset(force_seed=15) + env.reset(seed=15) new_config = env.vehicle.get_config(True) assert recursive_equal(old_config_2, new_config) finally: diff --git a/metadrive/tests/test_functionality/test_reward_cost_done.py b/metadrive/tests/test_functionality/test_reward_cost_done.py index dc9dfc69c..ad228b400 100644 --- a/metadrive/tests/test_functionality/test_reward_cost_done.py +++ b/metadrive/tests/test_functionality/test_reward_cost_done.py @@ -22,8 +22,8 @@ def test_reward_cost_done(): # env = MetaDriveEnv(config=config) # env.reset() # for _ in range(1000): - # o, r, d, i = env.step([0, 1]) - # if d: + # o, r, tm, tc, i = env.step([0, 1]) + # if tm or tc: # break # assert i[TerminationState.SUCCESS] # assert i["cost"] == 0 @@ -40,8 +40,8 @@ def test_reward_cost_done(): # env = MetaDriveEnv(config=config) # env.reset() # for _ in range(1000): - # o, r, d, i = env.step([1, 1]) - # if d: + # o, r, tm, tc, i = env.step([1, 1]) + # if tm or tc: # break # assert i[TerminationState.OUT_OF_ROAD] # assert i["cost"] == rewards["out_of_road_cost"] @@ -59,10 +59,10 @@ def test_reward_cost_done(): env.reset() epr = 0 for _ in range(1000): - o, r, d, i = env.step([0, 1]) + o, r, tm, tc, i = env.step([0, 1]) epr += r # print("R: {}, Accu R: {}".format(r, epr)) - if d: + if tm or tc: epr = 0 break assert i[TerminationState.CRASH] @@ -84,8 +84,8 @@ def test_reward_cost_done(): # env = MetaDriveEnv(config=config) # env.reset() # for _ in range(1000): - # o, r, d, i = env.step([0, 1]) - # if d: + # o, r, tm, tc, i = env.step([0, 1]) + # if tm or tc: # break # assert i[TerminationState.CRASH] # assert i[TerminationState.CRASH_OBJECT] diff --git a/metadrive/tests/test_functionality/test_scenario_randomness.py b/metadrive/tests/test_functionality/test_scenario_randomness.py index e3653fa68..c6eb0f030 100644 --- a/metadrive/tests/test_functionality/test_scenario_randomness.py +++ b/metadrive/tests/test_functionality/test_scenario_randomness.py @@ -38,28 +38,28 @@ def test_scenario_randomness(vis=False): env = SafeMetaDriveEnv(cfg) try: positions_1 = [] - o = env.reset() + o, _ = env.reset() positions_1.append([env.vehicle.position] + [v.position for v in env.engine.traffic_manager.traffic_vehicles]) for i in range(1, 100000 if vis else 2000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) positions_1.append( [env.vehicle.position] + [v.position for v in env.engine.traffic_manager.traffic_vehicles] ) - if d: + if tm or tc: break env.close() positions_1.reverse() env = SafeMetaDriveEnv(cfg) - o = env.reset() + o, _ = env.reset() old_position = positions_1.pop() new_position = [env.vehicle.position] + [v.position for v in env.engine.traffic_manager.traffic_vehicles] assert_equal_pos(old_position, new_position) for i in range(1, 100000 if vis else 2000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) old_position = positions_1.pop() new_position = [env.vehicle.position] + [v.position for v in env.engine.traffic_manager.traffic_vehicles] assert_equal_pos(old_position, new_position) - if d: + if tm or tc: break finally: env.close() diff --git a/metadrive/tests/test_functionality/test_traffic_mode.py b/metadrive/tests/test_functionality/test_traffic_mode.py index e3c275cb3..148d08a25 100644 --- a/metadrive/tests/test_functionality/test_traffic_mode.py +++ b/metadrive/tests/test_functionality/test_traffic_mode.py @@ -15,14 +15,14 @@ def test_traffic_mode(render=False): } ) - o = env.reset() + o, _ = env.reset() env.vehicle.set_velocity([1, 0.1], 10) if mode == "respawn": assert len(env.engine.traffic_manager._traffic_vehicles) != 0 elif mode == "hybrid" or mode == "trigger": assert len(env.engine.traffic_manager._traffic_vehicles) == 0 for s in range(1, 300): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) if mode == "hybrid" or mode == "respawn": assert len(env.engine.traffic_manager._traffic_vehicles) != 0 elif mode == "trigger": diff --git a/metadrive/tests/test_installation.py b/metadrive/tests/test_installation.py index 590a50a9f..b12f349b4 100644 --- a/metadrive/tests/test_installation.py +++ b/metadrive/tests/test_installation.py @@ -28,7 +28,7 @@ def capture_headless_image(cuda, image_source="main_camera"): try: env.reset() for i in range(10): - o, r, d, i = env.step([0, 1]) + o, r, tm, tc, i = env.step([0, 1]) assert isinstance(o, dict) # print("The observation is a dict with numpy arrays as values: ", {k: v.shape for k, v in o.items()}) o = o["image"][..., -1] * 255 if not cuda else o["image"].get()[..., -1] * 255 @@ -68,7 +68,7 @@ def verify_installation(cuda=False, camera="main"): try: env.reset() for i in range(1, 100): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) except: print("Error happens in Bullet physics world !") sys.exit() diff --git a/metadrive/tests/test_policy/test_expert_performance.py b/metadrive/tests/test_policy/test_expert_performance.py index 65910c747..fadd0482f 100644 --- a/metadrive/tests/test_policy/test_expert_performance.py +++ b/metadrive/tests/test_policy/test_expert_performance.py @@ -14,12 +14,12 @@ def _evaluate(env_config, num_episode, has_traffic=True, need_on_same_lane=True) env = MetaDriveEnv(env_config) lane_idx_need_to_stay = 0 try: - obs = env.reset() + obs, _ = env.reset() lidar_success = False success_list, reward_list, ep_reward, ep_len, ep_count = [], [], 0, 0, 0 while ep_count < num_episode: action = expert(env.vehicle, deterministic=True) - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) if need_on_same_lane: assert lane_idx_need_to_stay == env.vehicle.lane_index[-1], "Not one the same lane" # double check lidar @@ -30,7 +30,7 @@ def _evaluate(env_config, num_episode, has_traffic=True, need_on_same_lane=True) lidar_success = True ep_reward += reward ep_len += 1 - if done: + if terminated or truncated: ep_count += 1 success_list.append(1 if get_terminal_state(info) == "Success" else 0) reward_list.append(ep_reward) @@ -40,7 +40,7 @@ def _evaluate(env_config, num_episode, has_traffic=True, need_on_same_lane=True) ">", ">>", len(reward_list) % 3 ) lane_idx_need_to_stay = len(reward_list) % 3 - obs = env.reset() + obs, _ = env.reset() if has_traffic: assert lidar_success lidar_success = False diff --git a/metadrive/tests/test_policy/test_idm_policy.py b/metadrive/tests/test_policy/test_idm_policy.py index 3330bb2c6..45f41b714 100644 --- a/metadrive/tests/test_policy/test_idm_policy.py +++ b/metadrive/tests/test_policy/test_idm_policy.py @@ -58,7 +58,7 @@ def test_idm_policy_is_moving(render=False, in_test=True): if render: config.update({"use_render": True, "manual_control": True}) env = MetaDriveEnv(config) - env.reset(force_seed=0) + env.reset(seed=0) last_pos = None try: for t in range(100): diff --git a/metadrive/tests/test_policy/test_lane_change_policy.py b/metadrive/tests/test_policy/test_lane_change_policy.py index a1d142ebd..5720b031c 100644 --- a/metadrive/tests/test_policy/test_lane_change_policy.py +++ b/metadrive/tests/test_policy/test_lane_change_policy.py @@ -21,9 +21,9 @@ def test_check_discrete_space(render=False): ) assert not env.config["use_multi_discrete"] try: - o = env.reset() + o, _ = env.reset() for s in range(1, 30): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) assert env.action_space.n == env.config["discrete_throttle_dim"] * 3 finally: env.close() @@ -48,9 +48,9 @@ def test_check_multi_discrete_space(render=False): ) assert env.config["use_multi_discrete"] try: - o = env.reset() + o, _ = env.reset() for s in range(1, 30): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) assert env.action_space.nvec[0] == 3 and env.action_space.nvec[1] == env.config["discrete_throttle_dim"] finally: env.close() @@ -76,15 +76,15 @@ def test_lane_change(render=False): } ) try: - o = env.reset() + o, _ = env.reset() for s in range(1, 60): - o, r, d, info = env.step([2, 3]) + o, r, tm, tc, info = env.step([2, 3]) assert env.vehicle.lane.index[-1] == 0 for s in range(1, 40): - o, r, d, info = env.step([0, 3]) + o, r, tm, tc, info = env.step([0, 3]) assert env.vehicle.lane.index[-1] == 2 for s in range(1, 70): - o, r, d, info = env.step([1, 3]) + o, r, tm, tc, info = env.step([1, 3]) assert env.vehicle.lane.index[-1] == 2 finally: env.close() diff --git a/metadrive/tests/test_policy/test_trajectory_dim_policy.py b/metadrive/tests/test_policy/test_trajectory_dim_policy.py index 6ec063556..e0d1f36c7 100644 --- a/metadrive/tests/test_policy/test_trajectory_dim_policy.py +++ b/metadrive/tests/test_policy/test_trajectory_dim_policy.py @@ -31,7 +31,7 @@ def test_trajectory_idm(render=False): ) try: for seed in [0, 1, 2]: - o = env.reset(force_seed=seed) + o, _ = env.reset(seed=seed) sdc_route = env.engine.map_manager.current_sdc_route v_config = copy.deepcopy(env.engine.global_config["vehicle_config"]) v_config.update( @@ -54,7 +54,7 @@ def test_trajectory_idm(render=False): v_list.append(v) for s in range(1000): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) if s == 100: v = v_list.pop(0) env.engine.clear_objects([v.id]) @@ -70,7 +70,7 @@ def test_trajectory_idm(render=False): assert not info["crash"] - if d: + if tm or tc: assert info["arrive_dest"] break finally: diff --git a/metadrive/tests/tools/adjust_collision_model.py b/metadrive/tests/tools/adjust_collision_model.py index d639aa3a5..0e37d62fb 100644 --- a/metadrive/tests/tools/adjust_collision_model.py +++ b/metadrive/tests/tools/adjust_collision_model.py @@ -39,7 +39,7 @@ import time start = time.time() - o = env.reset() + o, _ = env.reset() def get_v_path(): return BaseVehicle.model_collection[env.vehicle.path[0]] @@ -70,7 +70,7 @@ def decrease_y(): env.engine.accept("l", decrease_y) for s in range(1, 10000): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, i = env.step([0, 0]) env.render( text={ "heading_diff": env.vehicle.heading_diff(env.vehicle.lane), diff --git a/metadrive/tests/vis_block/vis_yy.py b/metadrive/tests/vis_block/vis_yy.py index 6cfc41d53..871d27cf1 100644 --- a/metadrive/tests/vis_block/vis_yy.py +++ b/metadrive/tests/vis_block/vis_yy.py @@ -37,10 +37,10 @@ } ) - o = env.reset() + o, _ = env.reset() # print("vehicle num", len(env.engine.traffic_manager.vehicles)) for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) info["fuel"] = env.vehicle.energy_consumption env.render( text={ @@ -49,7 +49,7 @@ "white_lane_line": env.vehicle.on_white_continuous_line } ) - if d: + if tm or tc: # print("Reset") env.reset() env.close() diff --git a/metadrive/tests/vis_env/vis_acc_break.py b/metadrive/tests/vis_env/vis_acc_break.py index 3db3b24f3..d47865139 100644 --- a/metadrive/tests/vis_env/vis_acc_break.py +++ b/metadrive/tests/vis_env/vis_acc_break.py @@ -21,10 +21,10 @@ import time start = time.time() - o = env.reset() + o, _ = env.reset() a = [.0, 1.] for s in range(1, 100000): - o, r, d, info = env.step(a) + o, r, tm, tc, info = env.step(a) if env.vehicle.speed_km_h > 100: a = [0, -1] # print("0-100 km/h acc use time:{}".format(s * 0.1)) diff --git a/metadrive/tests/vis_env/vis_argoverse.py b/metadrive/tests/vis_env/vis_argoverse.py index c75ee7758..fb5b85cc4 100644 --- a/metadrive/tests/vis_env/vis_argoverse.py +++ b/metadrive/tests/vis_env/vis_argoverse.py @@ -47,9 +47,9 @@ def _update_map(self, episode_data: dict = None): } ) - o = env.reset() + o, _ = env.reset() for i in range(1, 100000): - o, r, d, info = env.step([1.0, 0.]) + o, r, tm, tc, info = env.step([1.0, 0.]) info = {} info["lane_index"] = env.vehicle.lane_index info["heading_diff"] = env.vehicle.heading_diff(env.vehicle.lane) diff --git a/metadrive/tests/vis_env/vis_metadrive_env.py b/metadrive/tests/vis_env/vis_metadrive_env.py index dc24ec258..69923a880 100644 --- a/metadrive/tests/vis_env/vis_metadrive_env.py +++ b/metadrive/tests/vis_env/vis_metadrive_env.py @@ -96,7 +96,7 @@ def lift_terrain(): start = time.time() - o = env.reset() + o, _ = env.reset() if env.config["render_pipeline"]: env.engine.accept("5", env.engine.render_pipeline.reload_shaders) env.engine.accept("7", acc_speed) @@ -110,7 +110,7 @@ def lift_terrain(): # env.vehicle.set_velocity([5, 0], in_local_frame=True) for s in range(1, 100000): # env.vehicle.set_velocity([1, 0], in_local_frame=True) - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) # env.vehicle.set_pitch(-np.pi/4) # [0.09231533, 0.491018, 0.47076905, 0.7691619, 0.5, 0.5, 1.0, 0.0, 0.48037243, 0.8904728, 0.81229943, 0.7317231, 1.0, 0.85320455, 0.9747932, 0.65675277, 0.0, 0.5, 0.5] @@ -128,7 +128,7 @@ def lift_terrain(): # "current_seed": env.current_seed # } # ) - # if d: + # if tm or tc: # env.reset() # # assert env.observation_space.contains(o) # if (s + 1) % 100 == 0: @@ -138,7 +138,7 @@ def lift_terrain(): # time.time() - start, (s + 1) / (time.time() - start) # ) # ) - # if d: + # if tm or tc: # # # env.close() # # # print(len(env.engine._spawned_objects)) # env.reset() diff --git a/metadrive/tests/vis_env/vis_multi_agent_env.py b/metadrive/tests/vis_env/vis_multi_agent_env.py index 0bd8db236..cecec3788 100644 --- a/metadrive/tests/vis_env/vis_multi_agent_env.py +++ b/metadrive/tests/vis_env/vis_multi_agent_env.py @@ -23,13 +23,13 @@ def __init__(self): setup_logger(True) env = TestEnv() - o = env.reset() + o, _ = env.reset() # print("vehicle num", len(env.engine.traffic_manager.vehicles)) for i in range(1, 100000): - o, r, d, info = env.step({key: [0, 0] for key in env.action_space.sample()}) - # o, r, d, info = env.step([0,1]) + o, r, tm, tc, info = env.step({key: [0, 0] for key in env.action_space.sample()}) + # o, r, tm, tc, info = env.step([0,1]) env.render(text={"display_regions": len(env.engine.win.getDisplayRegions())}) - if True in d.values(): + if True in tm.values() or True in tc.values(): # print("Reset") env.reset() env.close() diff --git a/metadrive/tests/vis_env/vis_multi_agent_env_tiny.py b/metadrive/tests/vis_env/vis_multi_agent_env_tiny.py index 49a07d94e..9788ae27f 100644 --- a/metadrive/tests/vis_env/vis_multi_agent_env_tiny.py +++ b/metadrive/tests/vis_env/vis_multi_agent_env_tiny.py @@ -26,11 +26,11 @@ def __init__(self): if __name__ == "__main__": env = TestEnv() - o = env.reset() + o, _ = env.reset() # print("vehicle num", len(env.engine.traffic_manager.vehicles)) for i in range(1, 100000): - o, r, d, info = env.step(env.action_space.sample()) - if True in d.values(): + o, r, tm, tc, info = env.step(env.action_space.sample()) + if True in tm.values() or True in tc.values(): print("Somebody Done. ", info) # env.reset() env.close() diff --git a/metadrive/tests/vis_env/vis_safe_metadrive.py b/metadrive/tests/vis_env/vis_safe_metadrive.py index a3fb6ea45..a587496e5 100644 --- a/metadrive/tests/vis_env/vis_safe_metadrive.py +++ b/metadrive/tests/vis_env/vis_safe_metadrive.py @@ -17,9 +17,9 @@ } ) - o = env.reset() + o, _ = env.reset() # print("vehicle num", len(env.engine.traffic_manager.vehicles)) for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) env.render(text={}) env.close() diff --git a/metadrive/tests/vis_env/vis_topdown.py b/metadrive/tests/vis_env/vis_topdown.py index 77cc39825..ee8b9b5f9 100644 --- a/metadrive/tests/vis_env/vis_topdown.py +++ b/metadrive/tests/vis_env/vis_topdown.py @@ -19,6 +19,7 @@ # "controller": "joystick", # "manual_control": True, "use_render": False, + "render_mode": "top_down", "decision_repeat": 5, "need_inverse_traffic": True, "rgb_clip": True, @@ -49,12 +50,12 @@ import time # [9.95036221 0.99503618] start = time.time() - o = env.reset() + o, _ = env.reset() env.vehicle.set_velocity([1, 0.1], 10) # print(env.vehicle.speed) for s in range(1, 10000): - o, r, d, info = env.step([1, 0.5]) + o, r, tm, tc, info = env.step([1, 0.5]) # print("heading: {} forward_direction: {}".format(env.vehicle.heading, env.vehicle.velocity_direction)) # env.vehicle.set_velocity([1, 10], 10) @@ -64,4 +65,4 @@ # env.close() # env.reset() # info["fuel"] = env.vehicle.energy_consumption - env.render(mode="top_down", track_target_vehicle=True) + env.render(track_target_vehicle=True) diff --git a/metadrive/tests/vis_env/vis_waymo_env.py b/metadrive/tests/vis_env/vis_waymo_env.py index 9c2222548..7c65c0470 100644 --- a/metadrive/tests/vis_env/vis_waymo_env.py +++ b/metadrive/tests/vis_env/vis_waymo_env.py @@ -10,12 +10,12 @@ class DemoWaymoEnv(WaymoEnv): - def reset(self, force_seed=None): - if self.engine is not None and force_seed is None: + def reset(self, seed=None): + if self.engine is not None and seed is None: seeds = [i for i in range(self.config["num_scenarios"])] seeds.remove(self.current_seed) - force_seed = random.choice(seeds) - super(DemoWaymoEnv, self).reset(force_seed=force_seed) + seed = random.choice(seeds) + return super(DemoWaymoEnv, self).reset(seed=seed) if __name__ == "__main__": @@ -37,13 +37,13 @@ def reset(self, force_seed=None): } } ) - o = env.reset(force_seed=0) + o, _ = env.reset(seed=0) for i in range(1, 100000): - o, r, d, info = env.step([1.0, 0.]) + o, r, tm, tc, info = env.step([1.0, 0.]) # print(env.vehicle.height) env.render(text={"seed": env.current_seed, "reward": r}) - if d: + if tm or tc: # print(info["arrive_dest"]) env.reset() env.close() diff --git a/metadrive/tests/vis_functionality/profile_rgb_cam.py b/metadrive/tests/vis_functionality/profile_rgb_cam.py index c613653c8..82da6e815 100644 --- a/metadrive/tests/vis_functionality/profile_rgb_cam.py +++ b/metadrive/tests/vis_functionality/profile_rgb_cam.py @@ -26,11 +26,11 @@ ) start = time.time() for i in range(1, 100000): - o, r, d, info = env.step([0, 0]) + o, r, tm, tc, info = env.step([0, 0]) assert env.observation_space.contains(o) # if i % 1000 == 0: # print("FPS: {}".format(i / (time.time() - start))) - if d: + if tm or tc: # print("Reset") env.reset() env.close() diff --git a/metadrive/tests/vis_functionality/vis_camera_efficiency.py b/metadrive/tests/vis_functionality/vis_camera_efficiency.py index 01cad8788..cdf431cb2 100644 --- a/metadrive/tests/vis_functionality/vis_camera_efficiency.py +++ b/metadrive/tests/vis_functionality/vis_camera_efficiency.py @@ -19,11 +19,11 @@ def _test_rgb_camera_as_obs(render=False): show_fps=False, ) ) - obs = env.reset() + obs, _ = env.reset() action = [0.0, 0.1] start = time.time() - for i in range(20000): - o, r, d, _ = env.step(action) + for s in range(20000): + o, r, tm, tc, i = env.step(action) # engine = env.engine # if engine.episode_step <= 1: # engine.graphicsEngine.renderFrame() diff --git a/metadrive/tests/vis_functionality/vis_cuda_profile_metadrive.py b/metadrive/tests/vis_functionality/vis_cuda_profile_metadrive.py index 47fef899a..2b114fbf7 100644 --- a/metadrive/tests/vis_functionality/vis_cuda_profile_metadrive.py +++ b/metadrive/tests/vis_functionality/vis_cuda_profile_metadrive.py @@ -22,11 +22,11 @@ def _test_depth_camera_as_obs(render=False): show_fps=False, ) ) - obs = env.reset() + obs, _ = env.reset() action = [0.0, 0.1] start = time.time() for i in range(20000): - o, r, d, _ = env.step(action) + o, r, tm, tc, _ = env.step(action) if render: ret = o["image"].get()[..., -1] if env.config["image_on_cuda"] else o["image"][..., -1] cv2.imshow("window", ret) @@ -53,17 +53,17 @@ def _test_main_rgb_camera_as_obs_with_interface(render=False): show_fps=False, ) ) - obs = env.reset() + obs, _ = env.reset() action = [0.0, 0.1] start = time.time() reset_time = 0 for i in range(20000): - o, r, d, _ = env.step(action) + o, r, tm, tc, _ = env.step(action) if render: ret = o["image"].get()[..., -1] if env.config["image_on_cuda"] else o["image"][..., -1] cv2.imshow("window", ret) cv2.waitKey(1) - if d: + if tm or tc: current = time.time() # env.reset() # reset_time += time.time()-current @@ -87,16 +87,16 @@ def _test_main_rgb_camera_no_interface(render=False): show_fps=False, ) ) - obs = env.reset() + obs, _ = env.reset() action = [0.0, 0.1] start = time.time() for i in range(20000): - o, r, d, _ = env.step(action) + o, r, tm, tc, _ = env.step(action) if render: ret = o["image"].get()[..., -1] if env.config["image_on_cuda"] else o["image"][..., -1] cv2.imshow("window", ret) cv2.waitKey(1) - if d: + if tm or tc: # print("FPS: {}".format(i / (time.time() - start))) # env.reset() break @@ -118,16 +118,16 @@ def _test_rgb_camera_as_obs(render=False): show_fps=False, ) ) - obs = env.reset() + obs, _ = env.reset() action = [0.0, 0.1] start = time.time() for i in range(20000): - o, r, d, _ = env.step(action) + o, r, tm, tc, _ = env.step(action) if render: ret = o["image"].get()[..., -1] if env.config["image_on_cuda"] else o["image"][..., -1] cv2.imshow("window", ret) cv2.waitKey(1) - if d: + if tm or tc: # print("FPS: {}".format(i / (time.time() - start))) env.reset() # break diff --git a/metadrive/tests/vis_functionality/vis_depth_cam_ground.py b/metadrive/tests/vis_functionality/vis_depth_cam_ground.py index 5fe0efe3c..1ff3ddd44 100644 --- a/metadrive/tests/vis_functionality/vis_depth_cam_ground.py +++ b/metadrive/tests/vis_functionality/vis_depth_cam_ground.py @@ -42,13 +42,13 @@ def get_image(env): env.engine.accept("m", get_image, extraArgs=[env]) for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) assert env.observation_space.contains(o) if env.config["use_render"]: # for i in range(ImageObservation.STACK_SIZE): # ObservationType.show_gray_scale_array(o["image"][:, :, i]) env.render() - # if d: + # if tm or tc: # # print("Reset") # env.reset() env.close() diff --git a/metadrive/tests/vis_functionality/vis_depth_cam_no_ground.py b/metadrive/tests/vis_functionality/vis_depth_cam_no_ground.py index 39aef41e8..6d5e3387b 100644 --- a/metadrive/tests/vis_functionality/vis_depth_cam_no_ground.py +++ b/metadrive/tests/vis_functionality/vis_depth_cam_no_ground.py @@ -25,13 +25,13 @@ env.engine.accept("m", env.vehicle.get_camera("depth_camera").save_image, extraArgs=[env.vehicle, "debug.jpg"]) for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) assert env.observation_space.contains(o) if env.config["use_render"]: # for i in range(ImageObservation.STACK_SIZE): # ObservationType.show_gray_scale_array(o["image"][:, :, i]) env.render(text={"can you see me": i}) - if d: + if tm or tc: env.vehicle.get_camera("rgb_camera").save_image(env.vehicle) # print("Reset") env.reset() diff --git a/metadrive/tests/vis_functionality/vis_grayscale_cam.py b/metadrive/tests/vis_functionality/vis_grayscale_cam.py index 9c9de028d..1b7372050 100644 --- a/metadrive/tests/vis_functionality/vis_grayscale_cam.py +++ b/metadrive/tests/vis_functionality/vis_grayscale_cam.py @@ -27,7 +27,7 @@ import cv2 for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) assert env.observation_space.contains(o) # save rgb_cam = env.vehicle.get_camera(env.vehicle.config["image_source"]) @@ -40,7 +40,7 @@ # ObservationType.show_gray_scale_array(o["image"][:, :, i]) # image = env.render(mode="any str except human", text={"can you see me": i}) # ObservationType.show_gray_scale_array(image) - if d: + if tm or tc: # print("Reset") env.reset() env.close() diff --git a/metadrive/tests/vis_functionality/vis_highway_render.py b/metadrive/tests/vis_functionality/vis_highway_render.py index cf6442253..1c6c61354 100644 --- a/metadrive/tests/vis_functionality/vis_highway_render.py +++ b/metadrive/tests/vis_functionality/vis_highway_render.py @@ -16,10 +16,10 @@ def vis_top_down_render_with_panda_render(): "traffic_mode": "respawn" } ) - o = env.reset() + o, _ = env.reset() s = time.time() for i in range(1, 100000): - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) env.render( text={ "vehicle_num": len(env.engine.traffic_manager.vehicles), diff --git a/metadrive/tests/vis_functionality/vis_manual_control_top_down_env.py b/metadrive/tests/vis_functionality/vis_manual_control_top_down_env.py index 90ee13ee8..57f367cd0 100644 --- a/metadrive/tests/vis_functionality/vis_manual_control_top_down_env.py +++ b/metadrive/tests/vis_functionality/vis_manual_control_top_down_env.py @@ -20,12 +20,12 @@ try: ep_reward = 0 while True: - o, r, d, i = env.step([0.01, 1]) + o, r, tm, tc, i = env.step([0.01, 1]) # print("Obs shape {}, reward {:.8f}, done {}, info {}".format(o.shape, r, d, i)) env.render() ep_reward += r - if d: + if tm or tc: # print("Episode reward: ", ep_reward) ep_reward = 0 env.reset() diff --git a/metadrive/tests/vis_functionality/vis_memory_leak.py b/metadrive/tests/vis_functionality/vis_memory_leak.py index 92c924b71..98ec6098b 100644 --- a/metadrive/tests/vis_functionality/vis_memory_leak.py +++ b/metadrive/tests/vis_functionality/vis_memory_leak.py @@ -26,8 +26,8 @@ def step(self, action): env = TestMemoryLeakEnv() env.reset() for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) # env.render("Test: {}".format(i)) - if d: + if tm or tc: env.reset() env.close() diff --git a/metadrive/tests/vis_functionality/vis_mini_map.py b/metadrive/tests/vis_functionality/vis_mini_map.py index 627c89e05..036c33607 100644 --- a/metadrive/tests/vis_functionality/vis_mini_map.py +++ b/metadrive/tests/vis_functionality/vis_mini_map.py @@ -17,14 +17,14 @@ env.engine.accept("m", env.vehicle.get_camera([env.config["image_source"]]).save_image) for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) assert env.observation_space.contains(o) if env.config["use_render"]: # from metadrive.envs.observation_type import ObservationType, ImageObservation # for i in range(ImageObservation.STACK_SIZE): # ObservationType.show_gray_scale_array(o["image"][:, :, i]) env.render(text={"can you see me": i}) - if d: + if tm or tc: # print("Reset") env.reset() env.close() diff --git a/metadrive/tests/vis_functionality/vis_pedestrian.py b/metadrive/tests/vis_functionality/vis_pedestrian.py index 529fceb71..0a0cf65f3 100644 --- a/metadrive/tests/vis_functionality/vis_pedestrian.py +++ b/metadrive/tests/vis_functionality/vis_pedestrian.py @@ -61,7 +61,7 @@ import time start = time.time() - o = env.reset() + o, _ = env.reset() obj_1 = env.engine.spawn_object(Pedestrian, position=[30, 0], heading_theta=0, random_seed=1) obj_2 = env.engine.spawn_object(Pedestrian, position=[30, 6], heading_theta=0, random_seed=1) c_1 = env.engine.spawn_object(Cyclist, position=[30, 8], heading_theta=0, random_seed=1) @@ -73,7 +73,7 @@ env.vehicle.set_velocity([10, 0], in_local_frame=False) for s in range(1, 10000): # print(c_1.heading_theta) - o, r, d, info = env.step(env.action_space.sample()) + o, r, tm, tc, info = env.step(env.action_space.sample()) # obj_1.set_velocity([1, 0], 2, in_local_frame=True) # obj_2.set_velocity([1, 0], 0.8, in_local_frame=True) if s == 300: diff --git a/metadrive/tests/vis_functionality/vis_render_msg.py b/metadrive/tests/vis_functionality/vis_render_msg.py index 551b1c126..9043e6470 100644 --- a/metadrive/tests/vis_functionality/vis_render_msg.py +++ b/metadrive/tests/vis_functionality/vis_render_msg.py @@ -28,6 +28,6 @@ env.reset() for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) env.render(text={"Frame": i, "Speed": env.vehicle.speed_km_h}) env.close() diff --git a/metadrive/tests/vis_functionality/vis_rgb_cam.py b/metadrive/tests/vis_functionality/vis_rgb_cam.py index 9aec7324f..fa6788d59 100644 --- a/metadrive/tests/vis_functionality/vis_rgb_cam.py +++ b/metadrive/tests/vis_functionality/vis_rgb_cam.py @@ -26,7 +26,7 @@ import cv2 for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) assert env.observation_space.contains(o) # save rgb_cam = env.vehicle.get_camera(env.vehicle.config["image_source"]) @@ -39,7 +39,7 @@ # ObservationType.show_gray_scale_array(o["image"][:, :, i]) # image = env.render(mode="any str except human", text={"can you see me": i}) # ObservationType.show_gray_scale_array(image) - if d: + if tm or tc: # print("Reset") env.reset() env.close() diff --git a/metadrive/tests/vis_functionality/vis_saver.py b/metadrive/tests/vis_functionality/vis_saver.py index 5328f35dd..f5fd07e14 100644 --- a/metadrive/tests/vis_functionality/vis_saver.py +++ b/metadrive/tests/vis_functionality/vis_saver.py @@ -17,10 +17,10 @@ } ) - o = env.reset() + o, _ = env.reset() # env.engine.force_fps.toggle() for i in range(1, 100000): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) text = {"save": env.save_mode} env.render(text=text) # if d: diff --git a/metadrive/tests/vis_functionality/vis_shared_camera.py b/metadrive/tests/vis_functionality/vis_shared_camera.py index 873ec1fae..14dac2881 100644 --- a/metadrive/tests/vis_functionality/vis_shared_camera.py +++ b/metadrive/tests/vis_functionality/vis_shared_camera.py @@ -21,7 +21,7 @@ def vis_ma_parking_lot_env(): } ) env.reset() - o, r, d, i = env.step(env.action_space.sample()) + o, r, tm, tc, i = env.step(env.action_space.sample()) for i in range(4): cv2.imshow('img', o["agent{}".format(i)]["image"][..., -1]) cv2.waitKey(0) diff --git a/metadrive/tests/vis_functionality/vis_two_speed_retrieve.py b/metadrive/tests/vis_functionality/vis_two_speed_retrieve.py index b8ff3d0e1..8a2380328 100644 --- a/metadrive/tests/vis_functionality/vis_two_speed_retrieve.py +++ b/metadrive/tests/vis_functionality/vis_two_speed_retrieve.py @@ -24,7 +24,7 @@ brake = [-1, -np.nan] env.reset() for i in range(1, 100000): - o, r, d, info = env.step(acc) + o, r, tm, tc, info = env.step(acc) # print( # "new:{}, old:{}, diff:{}".format( # env.vehicle.speed_km_h, env.vehicle.system.get_current_speed_km_hour(), diff --git a/metadrive/tests/vis_functionality/vis_vehicle_num.py b/metadrive/tests/vis_functionality/vis_vehicle_num.py index 49f92e79b..876a1e593 100644 --- a/metadrive/tests/vis_functionality/vis_vehicle_num.py +++ b/metadrive/tests/vis_functionality/vis_vehicle_num.py @@ -9,7 +9,7 @@ env.reset() count = [] for i in range(1, 101): - o, r, d, info = env.step([0, 1]) + o, r, tm, tc, info = env.step([0, 1]) env.reset() # print( # "Current map {}, vehicle number {}.".format(env.current_seed, env.engine.traffic_manager.get_vehicle_num()) diff --git a/metadrive/utils/config.py b/metadrive/utils/config.py index 22f1c80ed..05bb20771 100644 --- a/metadrive/utils/config.py +++ b/metadrive/utils/config.py @@ -20,7 +20,7 @@ def merge_config(old_dict, new_dict, new_keys_allowed=False): return Config(merged) -def _check_keys(new_config, old_config, prefix=""): +def _check_keys(new_config: Union[dict, "Config"], old_config: Union[dict, "Config"], prefix=""): if isinstance(new_config, Config): new_config = new_config.get_dict() if isinstance(old_config, Config): @@ -75,7 +75,7 @@ class Config: For these items, use Config["your key"] = None to init your PgConfig, then it will not implement type check at the first time. key "config" in map.py and key "force_fps" in world.py are good examples. """ - def __init__(self, config: Union[dict, "Config"], unchangeable=False): + def __init__(self, config: Union["Config", dict], unchangeable=False): self._unchangeable = False if isinstance(config, Config): config = config.get_dict() diff --git a/metadrive/utils/waymo/script/filter_cases.py b/metadrive/utils/waymo/script/filter_cases.py index a21fe4fda..d9894fc41 100644 --- a/metadrive/utils/waymo/script/filter_cases.py +++ b/metadrive/utils/waymo/script/filter_cases.py @@ -63,10 +63,10 @@ def handler(signum, frame): try: signal.signal(signal.SIGALRM, handler) signal.alarm(10) - env.reset(force_seed=i) + env.reset(seed=i) while True: - o, r, d, info = env.step([0, 0]) - if d or env.episode_step > max_step: + o, r, tm, tc, info = env.step([0, 0]) + if tm or tc or env.episode_step > max_step: if info["arrive_dest"] and env.episode_step > min_step: os.rename( os.path.join(scenario_data_path, "{}.pkl".format(i + start * 1000)), diff --git a/setup.py b/setup.py index 88eef6eb5..b0379dbdd 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ def is_win(): version = "0.3.0.1" install_requires = [ - "gym>=0.20.0, <0.26.0, !=0.23.*, !=0.24.*", + "gymnasium>=0.28, <0.29", "numpy>=1.21.6, <=1.24.2", "matplotlib", "pandas", @@ -80,6 +80,10 @@ def is_win(): "glfw", ] +gym_requirement = [ + "gym>=0.20.0, <=0.26.0" +] + setup( name="metadrive-simulator", python_requires='>=3.6, <3.12', # do version check with assert @@ -94,6 +98,7 @@ def is_win(): "cuda": cuda_requirement, "nuplan": nuplan_requirement, "waymo": waymo_requirement, + "gym": gym_requirement, "all": nuplan_requirement + cuda_requirement }, include_package_data=True,