diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 622bba139..4ac3ce536 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,19 +33,18 @@ jobs:
         python -m pip install --upgrade pip
         pip install setuptools==65.5.0
         # cpu version of pytorch - faster to download
-        pip install torch==1.11+cpu -f https://download.pytorch.org/whl/torch_stable.html
-        pip install pybullet==3.1.9
+        pip install torch==1.11.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install pybullet==3.2.5
         pip install -r requirements.txt
         # Use headless version
         pip install opencv-python-headless
-        # install parking-env to test HER (pinned so it works with gym 0.21)
-        pip install highway-env==1.5.0
+        pip install highway-env==1.7.1
         pip install -e .
     - name: Type check
       run: |
         make type
       # skip mypy type check for python3.7 (last forever for some reason)
-      if: "!(matrix.python-version == '3.7')" 
+      if: "!(matrix.python-version == '3.7')"
     - name: Check codestyle
       run: |
         make check-codestyle
diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml
index 3e2d6d27b..bc54afeaf 100644
--- a/.github/workflows/trained_agents.yml
+++ b/.github/workflows/trained_agents.yml
@@ -33,13 +33,12 @@ jobs:
         python -m pip install --upgrade pip
         pip install setuptools==65.5.0
         # cpu version of pytorch - faster to download
-        pip install torch==1.11+cpu -f https://download.pytorch.org/whl/torch_stable.html
-        pip install pybullet==3.1.9
+        pip install torch==1.11.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install pybullet==3.2.5
         pip install -r requirements.txt
         # Use headless version
         pip install opencv-python-headless
-        # install parking-env to test HER (pinned so it works with gym 0.21)
-        pip install highway-env==1.5.0
+        pip install highway-env==1.7.1
         # Add support for pickle5 protocol
         # TODO: remove me when dropping python 3.7
         pip install pickle5
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
deleted file mode 100644
index 290c793ce..000000000
--- a/.gitlab-ci.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-image: stablebaselines/rl-baselines3-zoo-cpu:latest
-
-# Recursive clone
-variables:
-  GIT_SUBMODULE_STRATEGY: recursive
-
-type-check:
-  script:
-  - make type
-
-pytest:
-  script:
-  # MKL_THREADING_LAYER=GNU to avoid MKL_THREADING_LAYER=INTEL incompatibility error
-  - MKL_THREADING_LAYER=GNU make pytest
-  coverage: '/^TOTAL.+?(\d+\%)$/'
-
-check-trained-agents:
-  script:
-  # MKL_THREADING_LAYER=GNU to avoid MKL_THREADING_LAYER=INTEL incompatibility error
-  - pip install pickle5  # Add support for pickle5 protocol
-  - MKL_THREADING_LAYER=GNU make check-trained-agents
-
-lint:
-  script:
-    - make check-codestyle
-    - make lint
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2f27d2db6..b84af63c1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,14 @@
+## Release 2.0.0a0 (WIP)
+
+### Breaking Changes
+- Upgraded to gym 0.26+
+- Fixed bug in HistoryWrapper, now returns the correct obs space limits
+
+### New Features
+- Gym 0.26+ patches to continue working with pybullet and TimeLimit wrapper
+
+### Bug fixes
+
 ## Release 1.8.0a2 (WIP)
 
 ### Breaking Changes
diff --git a/README.md b/README.md
index a6a4c98e3..d90e4711a 100644
--- a/README.md
+++ b/README.md
@@ -301,7 +301,7 @@ for multiple, specify a list:
 
 ```yaml
 env_wrapper:
-    - rl_zoo3.wrappers.DoneOnSuccessWrapper:
+    - rl_zoo3.wrappers.TruncatedOnSuccessWrapper:
         reward_offset: 1.0
     - sb3_contrib.common.wrappers.TimeFeatureWrapper
 ```
diff --git a/docker/Dockerfile b/docker/Dockerfile
index bd4d85967..9e4e6d767 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -21,7 +21,7 @@ RUN \
     mkdir -p ${CODE_DIR}/rl_zoo3 && \
     pip uninstall -y stable-baselines3 && \
     pip install -r /tmp/requirements.txt && \
-    pip install pip install highway-env==1.5 && \
+    pip install highway-env>=1.7.1 && \
     rm -rf $HOME/.cache/pip
 
 ENV PATH=$VENV/bin:$PATH
diff --git a/hyperparams/her.yml b/hyperparams/her.yml
index a8249f46d..1235314b9 100644
--- a/hyperparams/her.yml
+++ b/hyperparams/her.yml
@@ -59,7 +59,7 @@ FetchSlide-v1:
 FetchPickAndPlace-v1:
   env_wrapper:
     - sb3_contrib.common.wrappers.TimeFeatureWrapper
-    # - rl_zoo3.wrappers.DoneOnSuccessWrapper:
+    # - rl_zoo3.wrappers.TruncatedOnSuccessWrapper:
     #     reward_offset: 0
     #     n_successes: 4
     # - stable_baselines3.common.monitor.Monitor
diff --git a/hyperparams/ppo.yml b/hyperparams/ppo.yml
index 321ab0fdd..004feb50d 100644
--- a/hyperparams/ppo.yml
+++ b/hyperparams/ppo.yml
@@ -317,7 +317,7 @@ MiniGrid-FourRooms-v0:
   learning_rate: 2.5e-4
   clip_range: 0.2
 
-CarRacing-v0:
+CarRacing-v1:
   env_wrapper:
     - rl_zoo3.wrappers.FrameSkip:
         skip: 2
diff --git a/hyperparams/sac.yml b/hyperparams/sac.yml
index 9d41262e4..4457f50fb 100644
--- a/hyperparams/sac.yml
+++ b/hyperparams/sac.yml
@@ -161,7 +161,7 @@ MinitaurBulletDuckEnv-v0:
   learning_starts: 10000
 
 # To be tuned
-CarRacing-v0:
+CarRacing-v1:
   env_wrapper:
     - rl_zoo3.wrappers.FrameSkip:
         skip: 2
diff --git a/requirements.txt b/requirements.txt
index 5bb2a0460..dc8db9fb9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,20 +1,25 @@
-gym==0.21
-stable-baselines3[extra,tests,docs]>=1.8.0a2
-sb3-contrib>=1.8.0a2
+gym==0.26.2
+# stable-baselines3[extra,tests,docs]>=1.7.0
+git+https://github.com/carlosluis/stable-baselines3@fix_tests#egg=stable_baselines3[extra,tests,docs]
+# sb3-contrib>=1.7.0
+git+https://github.com/Stable-Baselines-Team/stable-baselines3-contrib@feat/new-gym-version
 box2d-py==2.3.8
 pybullet
 gym-minigrid
-scikit-optimize
-optuna
+# scikit-optimize
+optuna~=2.10.1
 pytablewriter~=0.64
 pyyaml>=5.1
 cloudpickle>=1.5.0
 plotly
-panda-gym==1.1.1 # tmp fix: until compatibility with panda-gym v2
+# need to upgrade to gymnasium:
+# panda-gym~=3.0.1
 rliable>=1.0.5
 wandb
+ale-py~=0.8.0
 huggingface_sb3>=2.2.1, <3.*
-seaborn
+seaborn~=0.11.2
 tqdm
 rich
 importlib-metadata~=4.13 # flake8 not compatible with importlib-metadata>5.0
+moviepy
diff --git a/rl_zoo3/__init__.py b/rl_zoo3/__init__.py
index 4d91fdf5a..b88384fb5 100644
--- a/rl_zoo3/__init__.py
+++ b/rl_zoo3/__init__.py
@@ -1,5 +1,12 @@
 import os
 
+# Important: import gym patches before everything
+# isort: off
+
+import rl_zoo3.gym_patches  # noqa: F401
+
+# isort: on
+
 from rl_zoo3.utils import (
     ALGOS,
     create_test_env,
diff --git a/rl_zoo3/enjoy.py b/rl_zoo3/enjoy.py
index 82471d764..d6c3964be 100644
--- a/rl_zoo3/enjoy.py
+++ b/rl_zoo3/enjoy.py
@@ -188,8 +188,10 @@ def enjoy() -> None:  # noqa: C901
             "clip_range": lambda _: 0.0,
         }
 
-    model = ALGOS[algo].load(model_path, env=env, custom_objects=custom_objects, device=args.device, **kwargs)
+    if "HerReplayBuffer" in hyperparams.get("replay_buffer_class", ""):
+        kwargs["env"] = env
 
+    model = ALGOS[algo].load(model_path, custom_objects=custom_objects, device=args.device, **kwargs)
     obs = env.reset()
 
     # Deterministic by default except for atari games
diff --git a/rl_zoo3/exp_manager.py b/rl_zoo3/exp_manager.py
index 4503121b0..a9bb9a1b9 100644
--- a/rl_zoo3/exp_manager.py
+++ b/rl_zoo3/exp_manager.py
@@ -513,22 +513,22 @@ def create_callbacks(self):
 
     @staticmethod
     def is_atari(env_id: str) -> bool:
-        entry_point = gym.envs.registry.env_specs[env_id].entry_point  # pytype: disable=module-attr
+        entry_point = gym.envs.registry[env_id].entry_point  # pytype: disable=module-attr
         return "AtariEnv" in str(entry_point)
 
     @staticmethod
     def is_bullet(env_id: str) -> bool:
-        entry_point = gym.envs.registry.env_specs[env_id].entry_point  # pytype: disable=module-attr
+        entry_point = gym.envs.registry[env_id].entry_point  # pytype: disable=module-attr
         return "pybullet_envs" in str(entry_point)
 
     @staticmethod
     def is_robotics_env(env_id: str) -> bool:
-        entry_point = gym.envs.registry.env_specs[env_id].entry_point  # pytype: disable=module-attr
+        entry_point = gym.envs.registry[env_id].entry_point  # pytype: disable=module-attr
         return "gym.envs.robotics" in str(entry_point) or "panda_gym.envs" in str(entry_point)
 
     @staticmethod
     def is_panda_gym(env_id: str) -> bool:
-        entry_point = gym.envs.registry.env_specs[env_id].entry_point  # pytype: disable=module-attr
+        entry_point = gym.envs.registry[env_id].entry_point  # pytype: disable=module-attr
         return "panda_gym.envs" in str(entry_point)
 
     def _maybe_normalize(self, env: VecEnv, eval_env: bool) -> VecEnv:
@@ -595,6 +595,10 @@ def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False)
         # See https://github.com/HumanCompatibleAI/imitation/pull/160
         spec = gym.spec(self.env_name.gym_id)
 
+        # Make Pybullet compatible with gym 0.26
+        if self.is_bullet(self.env_name.gym_id):
+            self.env_kwargs.update(dict(apply_api_compatibility=True))
+
         def make_env(**kwargs) -> gym.Env:
             env = spec.make(**kwargs)
             return env
diff --git a/rl_zoo3/gym_patches.py b/rl_zoo3/gym_patches.py
new file mode 100644
index 000000000..8aca37f68
--- /dev/null
+++ b/rl_zoo3/gym_patches.py
@@ -0,0 +1,58 @@
+"""
+Patches for gym 0.26+ so RL Zoo3 keeps working as before
+(notably TimeLimit wrapper and Pybullet envs)
+"""
+from typing import Any, Dict
+
+import numpy as np
+
+# Deprecation warning with gym 0.26 and numpy 1.24
+np.bool8 = np.bool_  # type: ignore[attr-defined]
+
+import gym  # noqa: E402
+
+
+class PatchedRegistry(dict):
+    """
+    gym.envs.registration.registry
+    is now a dictionnary and no longer an EnvRegistry() object.
+    """
+
+    @property
+    def env_specs(self) -> Dict[str, Any]:
+        return self
+
+
+class PatchedTimeLimit(gym.wrappers.TimeLimit):
+    """
+    See https://github.com/openai/gym/issues/3102
+    and https://github.com/Farama-Foundation/Gymnasium/pull/101:
+    keep the behavior as before and provide additionnal info
+    that the episode reached a timeout, but only
+    when the episode is over because of that.
+    """
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = self.env.step(action)
+        self._elapsed_steps += 1
+
+        if self._elapsed_steps >= self._max_episode_steps:
+            done = truncated or terminated
+            # TimeLimit.truncated key may have been already set by the environment
+            # do not overwrite it
+            # only set it when the episode is not over for other reasons
+            episode_truncated = not done or info.get("TimeLimit.truncated", False)
+            info["TimeLimit.truncated"] = episode_truncated
+            # truncated may have been set by the env too
+            truncated = truncated or episode_truncated
+
+        return observation, reward, terminated, truncated, info
+
+
+patched_registry = PatchedRegistry()
+patched_registry.update(gym.envs.registration.registry)
+gym.envs.registry = patched_registry
+gym.envs.registration.registry = patched_registry
+gym.wrappers.TimeLimit = PatchedTimeLimit  # type: ignore[misc]
+gym.wrappers.time_limit.TimeLimit = PatchedTimeLimit  # type: ignore[misc]
+gym.envs.registration.TimeLimit = PatchedTimeLimit  # type: ignore[misc]
diff --git a/rl_zoo3/import_envs.py b/rl_zoo3/import_envs.py
index f918e2e0c..36b3af06f 100644
--- a/rl_zoo3/import_envs.py
+++ b/rl_zoo3/import_envs.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import gym
 from gym.envs.registration import register
 
@@ -51,8 +53,8 @@
 
 # Register no vel envs
 def create_no_vel_env(env_id: str):
-    def make_env():
-        env = gym.make(env_id)
+    def make_env(render_mode: Optional[str] = None):
+        env = gym.make(env_id, render_mode=render_mode)
         env = MaskVelocityWrapper(env)
         return env
 
diff --git a/rl_zoo3/record_video.py b/rl_zoo3/record_video.py
index 90528738b..632093805 100644
--- a/rl_zoo3/record_video.py
+++ b/rl_zoo3/record_video.py
@@ -89,6 +89,9 @@
     if args.env_kwargs is not None:
         env_kwargs.update(args.env_kwargs)
 
+    # Force rgb_array rendering (gym 0.26+)
+    env_kwargs.update(render_mode="rgb_array")
+
     env = create_test_env(
         env_name.gym_id,
         n_envs=n_envs,
@@ -133,6 +136,12 @@
     if video_folder is None:
         video_folder = os.path.join(log_path, "videos")
 
+    if is_atari:
+        # Patch Atari for rendering
+        # see https://github.com/mgbellemare/Arcade-Learning-Environment/issues/473
+        env.unwrapped.render_mode = env_kwargs.get("render_mode")
+        env.render_mode = env_kwargs.get("render_mode")
+
     # Note: apparently it renders by default
     env = VecVideoRecorder(
         env,
@@ -153,10 +162,10 @@
                 episode_start=episode_starts,
                 deterministic=deterministic,
             )
-            obs, _, dones, _ = env.step(action)  # type: ignore[assignment]
-            episode_starts = dones
             if not args.no_render:
                 env.render()
+            obs, _, dones, _ = env.step(action)  # type: ignore[assignment]
+            episode_starts = dones
     except KeyboardInterrupt:
         pass
 
diff --git a/rl_zoo3/train.py b/rl_zoo3/train.py
index 1e52a5fc0..55149b7f2 100644
--- a/rl_zoo3/train.py
+++ b/rl_zoo3/train.py
@@ -164,7 +164,7 @@ def train() -> None:
         importlib.import_module(env_module)
 
     env_id = args.env
-    registered_envs = set(gym.envs.registry.env_specs.keys())  # pytype: disable=module-attr
+    registered_envs = set(gym.envs.registry.keys())  # pytype: disable=module-attr
 
     if args.yaml_file is not None:
         raise ValueError(
diff --git a/rl_zoo3/utils.py b/rl_zoo3/utils.py
index f23265883..d43dc8f05 100644
--- a/rl_zoo3/utils.py
+++ b/rl_zoo3/utils.py
@@ -2,6 +2,7 @@
 import glob
 import importlib
 import os
+from copy import deepcopy
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 
 import gym
@@ -39,12 +40,8 @@
 
 
 def flatten_dict_observations(env: gym.Env) -> gym.Env:
-    assert isinstance(env.observation_space, spaces.Dict)
-    try:
-        return gym.wrappers.FlattenObservation(env)
-    except AttributeError:
-        keys = env.observation_space.spaces.keys()
-        return gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))
+    assert isinstance(env.observation_space, gym.spaces.Dict)
+    return gym.wrappers.FlattenObservation(env)
 
 
 def get_wrapper_class(hyperparams: Dict[str, Any], key: str = "env_wrapper") -> Optional[Callable[[gym.Env], gym.Env]]:
@@ -234,12 +231,14 @@ def create_test_env(
         vec_env_cls = SubprocVecEnv  # type: ignore[assignment]
         # start_method = 'spawn' for thread safe
 
-    # panda-gym is based on pybullet, whose rendering requires to be configure at initialization
-    if ExperimentManager.is_panda_gym(env_id) and should_render:
-        if env_kwargs is None:
-            env_kwargs = {"render": True}
-        else:
-            env_kwargs["render"] = True
+    # Fix for gym 0.26, to keep old behavior
+    env_kwargs = env_kwargs or {}
+    env_kwargs = deepcopy(env_kwargs)
+    if "render_mode" not in env_kwargs and should_render:
+        env_kwargs.update(render_mode="human")
+
+    if ExperimentManager.is_bullet(env_id):
+        env_kwargs.update(apply_api_compatibility=True)
 
     env = make_vec_env(
         env_id,
@@ -255,7 +254,7 @@ def create_test_env(
     if "vec_env_wrapper" in hyperparams.keys():
         vec_env_wrapper = get_wrapper_class(hyperparams, "vec_env_wrapper")
         assert vec_env_wrapper is not None
-        env = vec_env_wrapper(env)
+        env = vec_env_wrapper(env)  # type: ignore[assignment, arg-type]
         del hyperparams["vec_env_wrapper"]
 
     # Load saved stats for normalizing input and rewards
diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt
index c3d22c01c..35a785a76 100644
--- a/rl_zoo3/version.txt
+++ b/rl_zoo3/version.txt
@@ -1 +1 @@
-1.8.0a2
+2.0.0a0
diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py
index e94e51a70..e3763cbaf 100644
--- a/rl_zoo3/wrappers.py
+++ b/rl_zoo3/wrappers.py
@@ -1,10 +1,13 @@
+from typing import Dict, Optional, Tuple
+
 import gym
 import numpy as np
 from gym import spaces
 from sb3_contrib.common.wrappers import TimeFeatureWrapper  # noqa: F401 (backward compatibility)
+from stable_baselines3.common.type_aliases import Gym26ResetReturn, Gym26StepReturn
 
 
-class DoneOnSuccessWrapper(gym.Wrapper):
+class TruncatedOnSuccessWrapper(gym.Wrapper):
     """
     Reset on success and offsets the reward.
     Useful for GoalEnv.
@@ -16,20 +19,21 @@ def __init__(self, env: gym.Env, reward_offset: float = 0.0, n_successes: int =
         self.n_successes = n_successes
         self.current_successes = 0
 
-    def reset(self):
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Gym26ResetReturn:
         self.current_successes = 0
-        return self.env.reset()
+        assert options is None, "Options not supported for now"
+        return self.env.reset(seed=seed)
 
-    def step(self, action):
-        obs, reward, done, info = self.env.step(action)
+    def step(self, action) -> Gym26StepReturn:
+        obs, reward, terminated, truncated, info = self.env.step(action)
         if info.get("is_success", False):
             self.current_successes += 1
         else:
             self.current_successes = 0
         # number of successes in a row
-        done = done or self.current_successes >= self.n_successes
+        truncated = truncated or self.current_successes >= self.n_successes
         reward += self.reward_offset
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
     def compute_reward(self, achieved_goal, desired_goal, info):
         reward = self.env.compute_reward(achieved_goal, desired_goal, info)
@@ -49,9 +53,9 @@ def __init__(self, env: gym.Env, noise_std: float = 0.1):
         super().__init__(env)
         self.noise_std = noise_std
 
-    def step(self, action):
+    def step(self, action) -> Gym26StepReturn:
         noise = np.random.normal(np.zeros_like(action), np.ones_like(action) * self.noise_std)
-        noisy_action = action + noise
+        noisy_action = np.clip(action + noise, self.action_space.low, self.action_space.high)
         return self.env.step(noisy_action)
 
 
@@ -72,11 +76,12 @@ def __init__(self, env: gym.Env, smoothing_coef: float = 0.0):
         # self.alpha = self.smoothing_coef
         # self.beta = np.sqrt(1 - self.alpha ** 2) / (1 - self.alpha)
 
-    def reset(self):
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Gym26ResetReturn:
         self.smoothed_action = None
-        return self.env.reset()
+        assert options is None, "Options not supported for now"
+        return self.env.reset(seed=seed)
 
-    def step(self, action):
+    def step(self, action) -> Gym26StepReturn:
         if self.smoothed_action is None:
             self.smoothed_action = np.zeros_like(action)
         self.smoothed_action = self.smoothing_coef * self.smoothed_action + (1 - self.smoothing_coef) * action
@@ -98,23 +103,24 @@ def __init__(self, env: gym.Env, delay: int = 10):
         self.current_step = 0
         self.accumulated_reward = 0.0
 
-    def reset(self):
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Gym26ResetReturn:
         self.current_step = 0
         self.accumulated_reward = 0.0
-        return self.env.reset()
+        assert options is None, "Options not supported for now"
+        return self.env.reset(seed=seed)
 
-    def step(self, action):
-        obs, reward, done, info = self.env.step(action)
+    def step(self, action) -> Gym26StepReturn:
+        obs, reward, terminated, truncated, info = self.env.step(action)
 
         self.accumulated_reward += reward
         self.current_step += 1
 
-        if self.current_step % self.delay == 0 or done:
+        if self.current_step % self.delay == 0 or terminated or truncated:
             reward = self.accumulated_reward
             self.accumulated_reward = 0.0
         else:
             reward = 0.0
-        return obs, reward, done, info
+        return obs, reward, terminated, truncated, info
 
 
 class HistoryWrapper(gym.Wrapper):
@@ -131,12 +137,11 @@ def __init__(self, env: gym.Env, horizon: int = 2):
         wrapped_obs_space = env.observation_space
         wrapped_action_space = env.action_space
 
-        # TODO: double check, it seems wrong when we have different low and highs
-        low_obs = np.repeat(wrapped_obs_space.low, horizon, axis=-1)
-        high_obs = np.repeat(wrapped_obs_space.high, horizon, axis=-1)
+        low_obs = np.tile(wrapped_obs_space.low, horizon)
+        high_obs = np.tile(wrapped_obs_space.high, horizon)
 
-        low_action = np.repeat(wrapped_action_space.low, horizon, axis=-1)
-        high_action = np.repeat(wrapped_action_space.high, horizon, axis=-1)
+        low_action = np.tile(wrapped_action_space.low, horizon)
+        high_action = np.tile(wrapped_action_space.high, horizon)
 
         low = np.concatenate((low_obs, low_action))
         high = np.concatenate((high_obs, high_action))
@@ -153,19 +158,20 @@ def __init__(self, env: gym.Env, horizon: int = 2):
         self.obs_history = np.zeros(low_obs.shape, low_obs.dtype)
         self.action_history = np.zeros(low_action.shape, low_action.dtype)
 
-    def _create_obs_from_history(self):
+    def _create_obs_from_history(self) -> np.ndarray:
         return np.concatenate((self.obs_history, self.action_history))
 
-    def reset(self):
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Tuple[np.ndarray, Dict]:
         # Flush the history
         self.obs_history[...] = 0
         self.action_history[...] = 0
-        obs = self.env.reset()
+        assert options is None, "Options not supported for now"
+        obs, info = self.env.reset(seed=seed)
         self.obs_history[..., -obs.shape[-1] :] = obs
-        return self._create_obs_from_history()
+        return self._create_obs_from_history(), info
 
-    def step(self, action):
-        obs, reward, done, info = self.env.step(action)
+    def step(self, action) -> Tuple[np.ndarray, float, bool, bool, Dict]:
+        obs, reward, terminated, truncated, info = self.env.step(action)
         last_ax_size = obs.shape[-1]
 
         self.obs_history = np.roll(self.obs_history, shift=-last_ax_size, axis=-1)
@@ -173,7 +179,7 @@ def step(self, action):
 
         self.action_history = np.roll(self.action_history, shift=-action.shape[-1], axis=-1)
         self.action_history[..., -action.shape[-1] :] = action
-        return self._create_obs_from_history(), reward, done, info
+        return self._create_obs_from_history(), reward, terminated, truncated, info
 
 
 class HistoryWrapperObsDict(gym.Wrapper):
@@ -190,12 +196,11 @@ def __init__(self, env: gym.Env, horizon: int = 2):
         wrapped_obs_space = env.observation_space.spaces["observation"]
         wrapped_action_space = env.action_space
 
-        # TODO: double check, it seems wrong when we have different low and highs
-        low_obs = np.repeat(wrapped_obs_space.low, horizon, axis=-1)
-        high_obs = np.repeat(wrapped_obs_space.high, horizon, axis=-1)
+        low_obs = np.tile(wrapped_obs_space.low, horizon)
+        high_obs = np.tile(wrapped_obs_space.high, horizon)
 
-        low_action = np.repeat(wrapped_action_space.low, horizon, axis=-1)
-        high_action = np.repeat(wrapped_action_space.high, horizon, axis=-1)
+        low_action = np.tile(wrapped_action_space.low, horizon)
+        high_action = np.tile(wrapped_action_space.high, horizon)
 
         low = np.concatenate((low_obs, low_action))
         high = np.concatenate((high_obs, high_action))
@@ -212,23 +217,24 @@ def __init__(self, env: gym.Env, horizon: int = 2):
         self.obs_history = np.zeros(low_obs.shape, low_obs.dtype)
         self.action_history = np.zeros(low_action.shape, low_action.dtype)
 
-    def _create_obs_from_history(self):
+    def _create_obs_from_history(self) -> np.ndarray:
         return np.concatenate((self.obs_history, self.action_history))
 
-    def reset(self):
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Tuple[Dict[str, np.ndarray], Dict]:
         # Flush the history
         self.obs_history[...] = 0
         self.action_history[...] = 0
-        obs_dict = self.env.reset()
+        assert options is None, "Options not supported for now"
+        obs_dict, info = self.env.reset(seed=seed)
         obs = obs_dict["observation"]
         self.obs_history[..., -obs.shape[-1] :] = obs
 
         obs_dict["observation"] = self._create_obs_from_history()
 
-        return obs_dict
+        return obs_dict, info
 
-    def step(self, action):
-        obs_dict, reward, done, info = self.env.step(action)
+    def step(self, action) -> Tuple[Dict[str, np.ndarray], float, bool, bool, Dict]:
+        obs_dict, reward, terminated, truncated, info = self.env.step(action)
         obs = obs_dict["observation"]
         last_ax_size = obs.shape[-1]
 
@@ -240,7 +246,7 @@ def step(self, action):
 
         obs_dict["observation"] = self._create_obs_from_history()
 
-        return obs_dict, reward, done, info
+        return obs_dict, reward, terminated, truncated, info
 
 
 class FrameSkip(gym.Wrapper):
@@ -255,26 +261,22 @@ def __init__(self, env: gym.Env, skip: int = 4):
         super().__init__(env)
         self._skip = skip
 
-    def step(self, action: np.ndarray):
+    def step(self, action) -> Gym26StepReturn:
         """
         Step the environment with the given action
         Repeat action, sum reward.
 
         :param action: the action
-        :return: observation, reward, done, information
+        :return: observation, reward, terminated, truncated, information
         """
         total_reward = 0.0
-        done = None
         for _ in range(self._skip):
-            obs, reward, done, info = self.env.step(action)
+            obs, reward, terminated, truncated, info = self.env.step(action)
             total_reward += reward
-            if done:
+            if terminated or truncated:
                 break
 
-        return obs, total_reward, done, info
-
-    def reset(self):
-        return self.env.reset()
+        return obs, total_reward, terminated, truncated, info
 
 
 class MaskVelocityWrapper(gym.ObservationWrapper):
diff --git a/setup.cfg b/setup.cfg
index 14b7d4b95..732937b1f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -8,6 +8,7 @@ filterwarnings =
     ignore::DeprecationWarning:tensorboard
     # Gym warnings
     ignore::UserWarning:gym
+    ignore::DeprecationWarning:.*passive_env_checker.*
 markers =
     slow: marks tests as slow (deselect with '-m "not slow"')
     serial
@@ -44,4 +45,5 @@ show_error_codes = True
 exclude = (?x)(
     rl_zoo3/hyperparams_opt.py$
     | rl_zoo3/exp_manager.py$
+    | rl_zoo3/wrappers.py$
   )
diff --git a/setup.py b/setup.py
index 73ddc7a48..939675231 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@
     },
     entry_points={"console_scripts": ["rl_zoo3=rl_zoo3.cli:main"]},
     install_requires=[
-        "sb3-contrib>=1.8.0a2",
+        "sb3_contrib @ git+https://github.com/Stable-Baselines-Team/stable-baselines3-contrib@feat/new-gym-version",
         "huggingface_sb3>=2.2.1, <3.*",
         "tqdm",
         "rich",
@@ -44,7 +44,7 @@
     url="https://github.com/DLR-RM/rl-baselines3-zoo",
     author_email="antonin.raffin@dlr.de",
     keywords="reinforcement-learning-algorithms reinforcement-learning machine-learning "
-    "gym openai stable baselines sb3 toolbox python data-science",
+    "gym gymnasium openai stable baselines sb3 toolbox python data-science",
     license="MIT",
     long_description=long_description,
     long_description_content_type="text/markdown",
diff --git a/tests/test_enjoy.py b/tests/test_enjoy.py
index 0e66be78f..49d69636d 100644
--- a/tests/test_enjoy.py
+++ b/tests/test_enjoy.py
@@ -32,10 +32,16 @@ def test_trained_agents(trained_model):
     if "CarRacing" in env_id:
         return
 
+    # FIXME: skip Panda gym envs
+    # need panda gym >= 3.0.1 and gymnasium
+    if "Panda" in env_id:
+        return
+
     # Skip mujoco envs
     if "Fetch" in trained_model or "-v3" in trained_model:
         return
 
+    # FIXME: switch to MiniGrid package
     if "-MiniGrid-" in trained_model:
         args = args + ["--gym-packages", "gym_minigrid"]
 
@@ -92,7 +98,8 @@ def test_record_video(tmp_path):
     args = ["-n", "100", "--algo", "sac", "--env", "Pendulum-v1", "-o", str(tmp_path)]
 
     # Skip if no X-Server
-    pytest.importorskip("pyglet.gl")
+    if not os.environ.get("DISPLAY"):
+        pytest.skip("No X-Server")
 
     return_code = subprocess.call(["python", "-m", "rl_zoo3.record_video"] + args)
     _assert_eq(return_code, 0)
@@ -130,7 +137,8 @@ def test_record_training(tmp_path):
     ]
 
     # Skip if no X-Server
-    pytest.importorskip("pyglet.gl")
+    if not os.environ.get("DISPLAY"):
+        pytest.skip("No X-Server")
 
     return_code = subprocess.call(["python", "train.py"] + args_training)
     _assert_eq(return_code, 0)
diff --git a/tests/test_hyperparams_opt.py b/tests/test_hyperparams_opt.py
index 89396a1f1..9c6bf9a61 100644
--- a/tests/test_hyperparams_opt.py
+++ b/tests/test_hyperparams_opt.py
@@ -47,6 +47,7 @@ def test_optimize(tmp_path, sampler, pruner, experiment):
     args = ["-n", str(N_STEPS), "--algo", algo, "--env", env_id, "-params", 'policy_kwargs:"dict(net_arch=[32])"', "n_envs:1"]
     args += ["n_steps:10"] if algo == "ppo" else []
     args += [
+        "--no-optim-plots",
         "--seed",
         "14",
         "--log-folder",
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
index c494a0076..bf8cf0325 100644
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -1,16 +1,16 @@
 import gym
-import pybullet_envs  # noqa: F401
 import pytest
 from stable_baselines3 import A2C
 from stable_baselines3.common.env_checker import check_env
 from stable_baselines3.common.env_util import DummyVecEnv
 
+import rl_zoo3.import_envs  # noqa: F401
 from rl_zoo3.utils import get_wrapper_class
 from rl_zoo3.wrappers import ActionNoiseWrapper, DelayedRewardWrapper, HistoryWrapper, TimeFeatureWrapper
 
 
 def test_wrappers():
-    env = gym.make("AntBulletEnv-v0")
+    env = gym.make("AntBulletEnv-v0", apply_api_compatibility=True)
     env = DelayedRewardWrapper(env)
     env = ActionNoiseWrapper(env)
     env = HistoryWrapper(env)
@@ -27,7 +27,7 @@ def test_wrappers():
     ],
 )
 def test_get_wrapper(env_wrapper):
-    env = gym.make("AntBulletEnv-v0")
+    env = gym.make("AntBulletEnv-v0", apply_api_compatibility=True)
     hyperparams = {"env_wrapper": env_wrapper}
     wrapper_class = get_wrapper_class(hyperparams)
     if env_wrapper is not None:
@@ -44,7 +44,8 @@ def test_get_wrapper(env_wrapper):
     ],
 )
 def test_get_vec_env_wrapper(vec_env_wrapper):
-    env = DummyVecEnv([lambda: gym.make("AntBulletEnv-v0")])
+    env = gym.make("AntBulletEnv-v0", apply_api_compatibility=True)
+    env = DummyVecEnv([lambda: env])
     hyperparams = {"vec_env_wrapper": vec_env_wrapper}
     wrapper_class = get_wrapper_class(hyperparams, "vec_env_wrapper")
     if wrapper_class is not None: