Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
go all in on the new termination + truncation semantics
  • Loading branch information
pimpale committed May 14, 2023
commit 57d62d0d94345bcf07177436a79f0a7f33ceb660
2 changes: 1 addition & 1 deletion metadrive/engine/base_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def agents(self):
def setup_main_camera(self):
from metadrive.engine.core.main_camera import MainCamera
# Not we should always enable main camera if image obs is required! Or RGBCamera will return incorrect result
if self.global_config["use_render"] or self.global_config["image_observation"]:
if self.global_config["render_mode"] is not None or self.global_config["image_observation"]:
return MainCamera(self, self.global_config["camera_height"], self.global_config["camera_dist"])
else:
return None
Expand Down
2 changes: 1 addition & 1 deletion metadrive/engine/core/engine_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def __init__(self, global_config):
loadPrcFileData("", "want-pstats 1")

# Setup onscreen render
if self.global_config["use_render"]:
if self.global_config["render_mode"] != None:
self.mode = RENDER_MODE_ONSCREEN
# Warning it may cause memory leak, Pand3d Official has fixed this in their master branch.
# You can enable it if your panda version is latest.
Expand Down
56 changes: 37 additions & 19 deletions metadrive/envs/base_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Union, Dict, AnyStr, Optional, Tuple, Callable

import gymnasium as gym
from gymnasium.wrappers.compatibility import LegacyEnv

import numpy as np
from panda3d.core import PNMImage
Expand Down Expand Up @@ -52,7 +51,7 @@
action_check=False,

# ===== Rendering =====
use_render=False, # pop a window to render or not
render_mode=None, # if "human" pop a window to render, if "rgb", return numpy array, if None, do neither
debug=False,
disable_model_compression=False, # disable compression if you wish to launch the window quicker.
cull_scene=True, # only for debug use
Expand Down Expand Up @@ -201,7 +200,7 @@
)


class BaseEnv(LegacyEnv):
class BaseEnv(gym.Env):
# Force to use this seed if necessary. Note that the recipient of the forced seed should be explicitly implemented.
_DEBUG_RANDOM_SEED = None
DEFAULT_AGENT = DEFAULT_AGENT
Expand All @@ -211,7 +210,7 @@ def default_config(cls) -> "Config":
return Config(BASE_DEFAULT_CONFIG)

# ===== Intialization =====
def __init__(self, config: dict = None):
def __init__(self, config: dict|None = None):
if config is None:
config = {}
merged_config = self._merge_extra_config(config)
Expand All @@ -238,7 +237,6 @@ def __init__(self, config: dict = None):
# self.engine: Optional[BaseEngine] = None

# In MARL envs with respawn mechanism, varying episode lengths might happen.
self.dones = None
self.episode_rewards = defaultdict(float)
self.episode_lengths = defaultdict(int)

Expand Down Expand Up @@ -296,8 +294,7 @@ def _after_lazy_init(self):
def step(self, actions: Union[np.ndarray, Dict[AnyStr, np.ndarray], int]):
actions = self._preprocess_actions(actions)
engine_info = self._step_simulator(actions)
o, r, d, i = self._get_step_return(actions, engine_info=engine_info)
return o, r, d, i
return self._get_step_return(actions, engine_info=engine_info)

def _preprocess_actions(self, actions: Union[np.ndarray, Dict[AnyStr, np.ndarray], int]) \
-> Union[np.ndarray, Dict[AnyStr, np.ndarray], int]:
Expand Down Expand Up @@ -345,22 +342,24 @@ def done_function(self, vehicle_id: str) -> Tuple[bool, Dict]:
raise NotImplementedError()

def render(self,
mode='human',
text: Optional[Union[dict, str]] = None,
return_bytes=False,
*args,
**kwargs) -> Optional[np.ndarray]:
"""
This is a pseudo-render function, only used to update onscreen message when using panda3d backend
:param mode: 'rgb'/'human'
:param text:text to show
:return: when mode is 'rgb', image array is returned
"""

# render mode is set at environment creation time
mode = self.config['render_mode']

if mode in ["top_down", "topdown", "bev", "birdview"]:
ret = self._render_topdown(text=text, *args, **kwargs)
return ret
assert self.config["use_render"] or self.engine.mode != RENDER_MODE_NONE, \
("Panda Renderring is off now, can not render. Please set config['use_render'] = True!")
assert mode is not None or self.engine.mode != RENDER_MODE_NONE, \
("Panda Renderring is off now, can not render. Please set config['render_mode'] != None!")

self.engine.render_frame(text)

Expand All @@ -369,7 +368,6 @@ def render(self,
return self.vehicle.observations.img_obs.get_image()

if mode == "rgb_array":
assert self.config["use_render"], "You should create a Panda3d window before rendering images!"
# if not hasattr(self, "temporary_img_obs"):
# from metadrive.obs.image_obs import ImageObservation
# image_source = "rgb_camera"
Expand Down Expand Up @@ -413,12 +411,30 @@ def reset(self, force_seed: Union[None, int] = None):
return self._get_reset_return()

def _get_reset_return(self):
ret = {}
self.engine.after_step()
# TODO: figure out how to get the information of the before step
scene_manager_before_step_infos = {}
scene_manager_after_step_infos = self.engine.after_step()

obses = {}
done_infos = {}
cost_infos = {}
reward_infos = {}
engine_info = merge_dicts(
scene_manager_after_step_infos, scene_manager_before_step_infos, allow_new_keys=True, without_copy=True
)
for v_id, v in self.vehicles.items():
self.observations[v_id].reset(self, v)
ret[v_id] = self.observations[v_id].observe(v)
return ret if self.is_multi_agent else self._wrap_as_single_agent(ret)
obses[v_id] = self.observations[v_id].observe(v)
_, reward_infos[v_id] = self.reward_function(v_id)
_, done_infos[v_id] = self.done_function(v_id)
_, cost_infos[v_id] = self.cost_function(v_id)

step_infos = concat_step_infos([engine_info, done_infos, reward_infos, cost_infos])

if self.is_multi_agent:
return (obses, step_infos)
else:
return (self._wrap_as_single_agent(obses), self._wrap_as_single_agent(step_infos))

def _get_step_return(self, actions, engine_info):
# update obs, dones, rewards, costs, calculate done at first !
Expand Down Expand Up @@ -446,7 +462,9 @@ def _get_step_return(self, actions, engine_info):
for k in self.dones:
self.dones[k] = True

dones = {k: self.dones[k] for k in self.vehicles.keys()}
terminateds = {k: self.dones[k] for k in self.vehicles.keys()}
truncateds = {k: False for k in self.vehicles.keys()}

for v_id, r in rewards.items():
self.episode_rewards[v_id] += r
step_infos[v_id]["episode_reward"] = self.episode_rewards[v_id]
Expand All @@ -455,9 +473,9 @@ def _get_step_return(self, actions, engine_info):

if not self.is_multi_agent:
return self._wrap_as_single_agent(obses), self._wrap_as_single_agent(rewards), \
self._wrap_as_single_agent(dones), self._wrap_as_single_agent(step_infos)
self._wrap_as_single_agent(terminateds), self._wrap_as_single_agent(truncateds), self._wrap_as_single_agent(step_infos)
else:
return obses, rewards, dones, step_infos
return obses, rewards, terminateds, truncateds, step_infos

def close(self):
if self.engine is not None:
Expand Down
30 changes: 15 additions & 15 deletions metadrive/envs/marl_envs/marl_bidirection.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,13 @@ def _expert():
total_r = 0
ep_s = 0
for i in range(1, 100000):
o, r, d, info = env.step(env.action_space.sample())
o, r, tm, tc, info = env.step(env.action_space.sample())
for r_ in r.values():
total_r += r_
ep_s += 1
d.update({"total_r": total_r, "episode length": ep_s})
tm.update({"total_r": total_r, "episode length": ep_s})
# env.render(text=d)
if d["__all__"]:
if tm["__all__"]:
print(
"Finish! Current step {}. Group Reward: {}. Average reward: {}".format(
i, total_r, total_r / env.agent_manager.next_agent_count
Expand Down Expand Up @@ -223,7 +223,7 @@ def _vis_debug_respawn():
ep_s = 0
for i in range(1, 100000):
action = {k: [.0, 1.0] for k in env.vehicles.keys()}
o, r, d, info = env.step(action)
o, r, tm, tc, info = env.step(action)
for r_ in r.values():
total_r += r_
ep_s += 1
Expand All @@ -236,7 +236,7 @@ def _vis_debug_respawn():
"cam_z": env.main_camera.top_down_camera_height
}
env.render(text=render_text)
if d["__all__"]:
if tm["__all__"]:
print(
"Finish! Current step {}. Group Reward: {}. Average reward: {}".format(
i, total_r, total_r / env.agent_manager.next_agent_count
Expand Down Expand Up @@ -272,7 +272,7 @@ def _vis():
total_r = 0
ep_s = 0
for i in range(1, 100000):
o, r, d, info = env.step({k: [1.0, .0] for k in env.vehicles.keys()})
o, r, tm, tc, info = env.step({k: [1.0, .0] for k in env.vehicles.keys()})
for r_ in r.values():
total_r += r_
ep_s += 1
Expand All @@ -290,7 +290,7 @@ def _vis():
render_text["dist_to_right"] = env.current_track_vehicle.dist_to_right_side
render_text["dist_to_left"] = env.current_track_vehicle.dist_to_left_side
env.render(text=render_text)
if d["__all__"]:
if tm["__all__"]:
print(
"Finish! Current step {}. Group Reward: {}. Average reward: {}".format(
i, total_r, total_r / env.agent_manager.next_agent_count
Expand All @@ -310,12 +310,12 @@ def _profile():
obs = env.reset()
start = time.time()
for s in range(10000):
o, r, d, i = env.step(env.action_space.sample())
o, r, tm, tc, i = env.step(env.action_space.sample())

# mask_ratio = env.engine.detector_mask.get_mask_ratio()
# print("Mask ratio: ", mask_ratio)

if all(d.values()):
if all(tm.values()):
env.reset()
if (s + 1) % 100 == 0:
print(
Expand Down Expand Up @@ -353,20 +353,20 @@ def _long_run():
assert env.observation_space.contains(obs)
for step in range(10000):
act = env.action_space.sample()
o, r, d, i = env.step(act)
o, r, tm, tc, i = env.step(act)
if step == 0:
assert not any(d.values())
assert not any(tm.values())

if any(d.values()):
if any(tm.values()):
print("Current Done: {}\nReward: {}".format(d, r))
for kkk, ddd in d.items():
for kkk, ddd in tm.items():
if ddd and kkk != "__all__":
print("Info {}: {}\n".format(kkk, i[kkk]))
print("\n")

for kkk, rrr in r.items():
if rrr == -_out_of_road_penalty:
assert d[kkk]
assert tm[kkk]

if (step + 1) % 200 == 0:
print(
Expand All @@ -376,7 +376,7 @@ def _long_run():
for k, oo in o.items()}, r, d, i
)
)
if d["__all__"]:
if tm["__all__"]:
print('Current step: ', step)
break
finally:
Expand Down
26 changes: 13 additions & 13 deletions metadrive/envs/marl_envs/marl_parking_lot.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,13 +319,13 @@ def _expert():
total_r = 0
ep_s = 0
for i in range(1, 100000):
o, r, d, info = env.step(env.action_space.sample())
o, r, tm, tc, info = env.step(env.action_space.sample())
for r_ in r.values():
total_r += r_
ep_s += 1
d.update({"total_r": total_r, "episode length": ep_s})
tm.update({"total_r": total_r, "episode length": ep_s})
# env.render(text=d)
if d["__all__"]:
if tm["__all__"]:
print(
"Finish! Current step {}. Group Reward: {}. Average reward: {}".format(
i, total_r, total_r / env.agent_manager.next_agent_count
Expand Down Expand Up @@ -363,7 +363,7 @@ def _vis_debug_respawn():
ep_s = 0
for i in range(1, 100000):
action = {k: [0.0, .0] for k in env.vehicles.keys()}
o, r, d, info = env.step(action)
o, r, tm, tc, info = env.step(action)
for r_ in r.values():
total_r += r_
ep_s += 1
Expand All @@ -376,7 +376,7 @@ def _vis_debug_respawn():
"cam_z": env.main_camera.top_down_camera_height
}
env.render(text=render_text)
if d["__all__"]:
if tm["__all__"]:
print(
"Finish! Current step {}. Group Reward: {}. Average reward: {}".format(
i, total_r, total_r / env.agent_manager.next_agent_count
Expand Down Expand Up @@ -421,7 +421,7 @@ def _vis():
actions = {k: [1.0, .0] for k in env.vehicles.keys()}
if len(env.vehicles) == 1:
actions = {k: [-1.0, .0] for k in env.vehicles.keys()}
o, r, d, info = env.step(actions)
o, r, tm, tc, info = env.step(actions)
for r_ in r.values():
total_r += r_
ep_s += 1
Expand Down Expand Up @@ -463,7 +463,7 @@ def _vis():
}
)
)
if d["__all__"]:
if tm["__all__"]:
print(
"Finish! Current step {}. Group Reward: {}. Average reward: {}".format(
i, total_r, total_r / env.agent_manager.next_agent_count
Expand All @@ -484,12 +484,12 @@ def _profile():
obs = env.reset()
start = time.time()
for s in range(10000):
o, r, d, i = env.step(env.action_space.sample())
o, r, tm, tc, i = env.step(env.action_space.sample())

# mask_ratio = env.engine.detector_mask.get_mask_ratio()
# print("Mask ratio: ", mask_ratio)

if all(d.values()):
if all(tm.values()):
env.reset()
if (s + 1) % 100 == 0:
print(
Expand Down Expand Up @@ -527,13 +527,13 @@ def _long_run():
assert env.observation_space.contains(obs)
for step in range(10000):
act = env.action_space.sample()
o, r, d, i = env.step(act)
o, r, tm, tc, i = env.step(act)
if step == 0:
assert not any(d.values())

if any(d.values()):
if any(tm.values()):
print("Current Done: {}\nReward: {}".format(d, r))
for kkk, ddd in d.items():
for kkk, ddd in tm.items():
if ddd and kkk != "__all__":
print("Info {}: {}\n".format(kkk, i[kkk]))
print("\n")
Expand All @@ -550,7 +550,7 @@ def _long_run():
for k, oo in o.items()}, r, d, i
)
)
if d["__all__"]:
if tm["__all__"]:
print('Current step: ', step)
break
finally:
Expand Down
Loading