diff --git a/gym/envs/box2d/lunar_lander.py b/gym/envs/box2d/lunar_lander.py index 0d9c6c0484b..c76646247ff 100644 --- a/gym/envs/box2d/lunar_lander.py +++ b/gym/envs/box2d/lunar_lander.py @@ -123,7 +123,13 @@ class LunarLander(gym.Env, EzPickle): `continuous=True` argument like below: ```python import gym - env = gym.make("LunarLander-v2", continuous=True) + env = gym.make( + "LunarLander-v2", + continuous: bool = False, + gravity: float = -10.0, + enable_wind: bool = False, + wind_power: float = 15.0, + ) ``` If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the action space will be `Box(-1, +1, (2,), dtype=np.float32)`. @@ -136,6 +142,15 @@ class LunarLander(gym.Env, EzPickle): booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively). + `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12. + + If `enable_wind=True` is passed, there will be wind effects applied to the lander. + The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`. + `k` is set to 0.01. + `C` is sampled randomly between -9999 and 9999. + + `wind_power` dictates the maximum magnitude of wind. + ### Version History - v2: Count energy spent - v1: Legs contact with ground added in state vector; contact with ground @@ -151,12 +166,32 @@ class LunarLander(gym.Env, EzPickle): metadata = {"render_modes": ["human", "rgb_array"], "render_fps": FPS} - def __init__(self, continuous: bool = False): + def __init__( + self, + continuous: bool = False, + gravity: float = -10.0, + enable_wind: bool = False, + wind_power: float = 15.0, + ): EzPickle.__init__(self) + + assert ( + -12.0 < gravity and gravity < 0.0 + ), f"gravity (current value: {gravity}) must be between -12 and 0" + self.gravity = gravity + + assert ( + 0.0 < wind_power and wind_power < 20.0 + ), f"wind_power (current value: {wind_power}) must be between 0 and 20" + self.wind_power = wind_power + + self.enable_wind = enable_wind + self.wind_idx = np.random.randint(-9999, 9999) + self.screen = None self.clock = None self.isopen = True - self.world = Box2D.b2World() + self.world = Box2D.b2World(gravity=(0, gravity)) self.moon = None self.lander = None self.particles = [] @@ -361,6 +396,25 @@ def _clean_particles(self, all): self.world.DestroyBody(self.particles.pop(0)) def step(self, action): + # Update wind + if self.enable_wind and not ( + self.legs[0].ground_contact or self.legs[1].ground_contact + ): + # the function used for wind is tanh(sin(2 k x) + sin(pi k x)), + # which is proven to never be periodic, k = 0.01 + wind_mag = ( + math.tanh( + math.sin(0.02 * self.wind_idx) + + (math.sin(math.pi * 0.01 * self.wind_idx)) + ) + * self.wind_power + ) + self.wind_idx += 1 + self.lander.ApplyForceToCenter( + (wind_mag, 0.0), + True, + ) + if self.continuous: action = np.clip(action, -1, +1).astype(np.float32) else: @@ -383,9 +437,8 @@ def step(self, action): assert m_power >= 0.5 and m_power <= 1.0 else: m_power = 1.0 - ox = ( - tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1] - ) # 4 is move a bit downwards, +-2 for randomness + # 4 is move a bit downwards, +-2 for randomness + ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1] oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1] impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy) p = self._create_particle( @@ -649,6 +702,10 @@ def heuristic(env, s): def demo_heuristic_lander(env, seed=None, render=False): + + # wind power must be reduced for heuristic landing + env.wind_power = 0.2 + total_reward = 0 steps = 0 s = env.reset(seed=seed) diff --git a/tests/envs/test_lunar_lander.py b/tests/envs/test_lunar_lander.py index 5aeab73f594..b15645859d9 100644 --- a/tests/envs/test_lunar_lander.py +++ b/tests/envs/test_lunar_lander.py @@ -18,6 +18,16 @@ def test_lunar_lander_continuous(): _test_lander(LunarLander(continuous=True), seed=0) +@pytest.mark.skipif(Box2D is None, reason="Box2D not installed") +def test_lunar_lander_wind(): + _test_lander(LunarLander(enable_wind=True), seed=0) + + +@pytest.mark.skipif(Box2D is None, reason="Box2D not installed") +def test_lunar_lander_wind_continuous(): + _test_lander(LunarLander(continuous=True, enable_wind=True), seed=0) + + @pytest.mark.skipif(Box2D is None, reason="Box2D not installed") def _test_lander(env, seed=None, render=False): total_reward = demo_heuristic_lander(env, seed=seed, render=render)