use mbridge

verl-project · ETOgaosion · Jul 3, 2025 · Jun 17, 2025 · Jun 17, 2025 · Jun 20, 2025
commit 99b41fedd9fe8a8f2c05b463e32be636ff30f74c
@@ -0,0 +1,21 @@
+try:
+    from mbridge import AutoBridge
+    from mbridge.utils.post_creation_callbacks import freeze_moe_router, make_value_model
+except ImportError:
+    import subprocess
+    import sys
+
+    print("mbridge package not found. This package is required for model bridging functionality.")
+    print("Install mbridge with `pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps`")
+
+    def install_mbridge():
+        try:
+            subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/ISEEKYAN/mbridge.git", "--no-deps"])
+        except subprocess.CalledProcessError:
+            print("Failed to install mbridge")
+            raise
+
+    install_mbridge()
+    from mbridge import *
+
+__all__ = ["AutoBridge", "make_value_model", "freeze_moe_router"]
@@ -47,6 +47,7 @@ def _init_hf_config_and_tf_config(
         override_model_config,
         override_transformer_config,
         trust_remote_code=False,
+        use_mbridge=False,
     ):
         from transformers import AutoConfig
 
@@ -94,6 +95,15 @@ def add_optimization_config_to_tf_config(tf_config):
                         setattr(tf_config, k, v)
 
         add_optimization_config_to_tf_config(tf_config)
+        if use_mbridge:
+            from verl.models.mcore.mbridge import AutoBridge
+
+            bridge = AutoBridge.from_config(hf_config)
+            bridge.set_extra_args(**override_transformer_config)
+            tf_config = bridge.config
+            self.bridge = bridge
+        else:
+            self.bridge = None
 
         print(f"TF config: {tf_config}")
         self.hf_config = hf_config

@@ -94,6 +94,7 @@ actor_rollout_ref:
       dist_checkpointing_path: null
       seed: 42
       override_transformer_config: {} # additional transformer config like: num_layers_in_first(/last)_pipeline_stage
+      use_mbridge: False
     profile: # profile the actor model in `update_policy` 
       use_profile: False # open it when you want to profile the actor model
       profile_ranks: null # list, you can specify the ranks to profile
@@ -124,6 +125,7 @@ actor_rollout_ref:
       dist_checkpointing_path: null
       seed: ${actor_rollout_ref.actor.megatron.seed}
       override_transformer_config: ${actor_rollout_ref.actor.megatron.override_transformer_config}
+      use_mbridge: ${actor_rollout_ref.actor.megatron.use_mbridge}
     profile:
       use_profile: False
       profile_ranks: null
@@ -245,6 +247,7 @@ critic:
     dist_checkpointing_path: null
     seed: ${actor_rollout_ref.actor.megatron.seed}
     override_transformer_config: ${actor_rollout_ref.actor.megatron.override_transformer_config}
+    use_mbridge: ${actor_rollout_ref.actor.megatron.use_mbridge}
   load_weight: True
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
@@ -284,6 +287,7 @@ reward_model:
     dist_checkpointing_path: null
     seed: ${actor_rollout_ref.actor.megatron.seed}
     override_transformer_config: {}
+    use_mbridge: ${actor_rollout_ref.actor.megatron.use_mbridge}
   model:
     input_tokenizer: ${actor_rollout_ref.model.path}  # set this to null if the chat template is identical
     path: ~/models/FsfairX-LLaMA3-RM-v0.1

diff --git a/verl/utils/checkpoint/megatron_checkpoint_manager.py b/verl/utils/checkpoint/megatron_checkpoint_manager.py
@@ -75,6 +75,7 @@ def __init__(
         use_distributed_optimizer: bool,
         use_checkpoint_opt_param_scheduler: bool = False,
         checkpoint_contents: DictConfig = None,
+        bridge=None,
         **kwargs,
     ):
         super().__init__(
@@ -97,7 +98,7 @@ def __init__(
         self.model_path = self.config.model.path
         self.use_distributed_optimizer = use_distributed_optimizer
         self.use_checkpoint_opt_param_scheduler = use_checkpoint_opt_param_scheduler
-
+        self.bridge = bridge
         self.rank = torch.distributed.get_rank()
 
         self.weight_saver = get_weight_saver(self.arch)
@@ -217,7 +218,11 @@ def load_checkpoint(self, local_path: str, hdfs_path: str = None, del_local_afte
         if local_path is None:
             return
 
-        if self.should_load_model:
+        if self.should_load_model and self.bridge is not None and not self.is_value_model:
+            model_path = get_model_checkpoint_path(local_path)
+            self.bridge.load_weights(self.model, model_path)
+            log_with_rank(f"Loaded HF model checkpoint from {model_path} with bridge", rank=self.rank, logger=logger)
+        elif self.should_load_model:
             model_path = get_model_checkpoint_path(local_path)
             ckpt_name = self.get_checkpoint_name(model_path, return_base_dir=False)
             state_dicts = torch.load(os.path.join(ckpt_name), weights_only=False)
@@ -260,7 +265,14 @@ def save_checkpoint(self, local_path: str, hdfs_path: str = None, global_step: i
         local_path = self.local_mkdir(local_path)
 
         # Save Model
-        if self.should_save_model and mpu.get_data_parallel_rank() == 0:
+        saved = False
+        if self.should_save_model and self.bridge is not None and not self.is_value_model:
+            log_with_rank(f"Saving HF model checkpoint to {local_path} with bridge", rank=self.rank, logger=logger)
+            model_ckpt_path = get_model_checkpoint_path(local_path)
+            self.bridge.save_weights(self.model, model_ckpt_path)
+            log_with_rank(f"Saved bridge checkpoint to {model_ckpt_path}", rank=self.rank, logger=logger)
+            saved = True
+        elif self.should_save_model and mpu.get_data_parallel_rank() == 0:
             state_dicts = []
 
             for vpp_rank, model in enumerate(self.model):
@@ -274,23 +286,24 @@ def save_checkpoint(self, local_path: str, hdfs_path: str = None, global_step: i
             torch.save(state_dicts, os.path.join(ckpt_name))
 
             log_with_rank(f"Saved checkpoint to {model_ckpt_path}", rank=self.rank, logger=logger)
-            if self.rank == 0:
-                self.processing_class.save_pretrained(hf_config_and_tokenizer_path)
-                self.hf_config.save_pretrained(hf_config_and_tokenizer_path)
-                if hasattr(self.hf_config, "name_or_path") and self.hf_config.name_or_path:
-                    try:
-                        generation_config = GenerationConfig.from_pretrained(self.hf_config.name_or_path)
-                        generation_config.save_pretrained(hf_config_and_tokenizer_path)
-                    except Exception:
-                        # if the generation config isn't available, we don't save it
-                        pass
-                if hdfs_path is not None:
-                    log_with_rank(f"Uploading checkpoint to {hdfs_path}", rank=self.rank, logger=logger)
-                    from verl.utils import hdfs_io
-
-                    hdfs_io.makedirs(hdfs_path, exist_ok=True)
-                    hdfs_io.copy(src=model_ckpt_path, dst=hdfs_path, dirs_exist_ok=True)
-                    hdfs_io.copy(src=hf_config_and_tokenizer_path, dst=hdfs_path, dirs_exist_ok=True)
+            saved = True
+        if self.rank == 0 and saved:
+            self.processing_class.save_pretrained(hf_config_and_tokenizer_path)
+            self.hf_config.save_pretrained(hf_config_and_tokenizer_path)
+            if hasattr(self.hf_config, "name_or_path") and self.hf_config.name_or_path:
+                try:
+                    generation_config = GenerationConfig.from_pretrained(self.hf_config.name_or_path)
+                    generation_config.save_pretrained(hf_config_and_tokenizer_path)
+                except Exception:
+                    # if the generation config isn't available, we don't save it
+                    pass
+            if hdfs_path is not None:
+                log_with_rank(f"Uploading checkpoint to {hdfs_path}", rank=self.rank, logger=logger)
+                from verl.utils import hdfs_io
+
+                hdfs_io.makedirs(hdfs_path, exist_ok=True)
+                hdfs_io.copy(src=model_ckpt_path, dst=hdfs_path, dirs_exist_ok=True)
+                hdfs_io.copy(src=hf_config_and_tokenizer_path, dst=hdfs_path, dirs_exist_ok=True)
 
         if self.should_save_hf_model:
             # wait for everyone to dump to local

diff --git a/verl/utils/model.py b/verl/utils/model.py
@@ -348,6 +348,17 @@ def _load_hf_model(config, model_config, is_value_model, local_cache_path):
     return architectures, model, state_dict, is_value_model
 
 
+def get_hf_model_path(config, local_cache_path="~/.cache/verl/rlhf"):
+    local_cache_path = os.path.expanduser(local_cache_path)
+    if config.model.path.startswith("hdfs:"):
+        from verl.utils.fs import copy_to_local
+
+        local_model_path = copy_to_local(src=config.model.path, cache_dir=local_cache_path, use_shm=config.model.get("use_shm", False))
+    else:
+        local_model_path = config.model.path
+    return local_model_path
+
+
 def load_megatron_model_weights(config, model_config, parallel_model, params_dtype, is_value_model=False, local_cache_path="~/.cache/verl/rlhf"):
     """Load weights for verl customized model."""
     architectures, model, state_dict, is_value_model = _load_hf_model(config, model_config, is_value_model, local_cache_path)