Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix lint
  • Loading branch information
Achazwl committed Nov 4, 2025
commit 8a23f6ee2956bc8f453b1522e248d8d1b1841a52
9 changes: 7 additions & 2 deletions torchtitan/models/qwen3/model/state_dict_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,14 @@ def from_hf(self, hf_state_dict: dict[str, Any]) -> dict[str, Any]:
state_dict = {}
expert_weights_by_layer = {} # {layer: {abstract_key: {expert_id: tensor}}}

if self.model_args.enable_weight_tying and "lm_head.weight" not in hf_state_dict:
if (
self.model_args.enable_weight_tying
and "lm_head.weight" not in hf_state_dict
):
if "model.embed_tokens.weight" in hf_state_dict:
hf_state_dict["lm_head.weight"] = hf_state_dict["model.embed_tokens.weight"]
hf_state_dict["lm_head.weight"] = hf_state_dict[
"model.embed_tokens.weight"
]

for key, value in hf_state_dict.items():
if "mlp.experts" in key:
Expand Down
Loading