fix

soodoshll · soodoshll · commit 7a163078633f · 2025-12-08T19:58:24.000Z
Signed-off-by: Qidong Su &lt;qidongs@nvidia.com&gt;
diff --git a/modelopt/torch/quantization/plugins/huggingface.py b/modelopt/torch/quantization/plugins/huggingface.py
@@ -490,17 +490,21 @@ def forward(self, x: torch.Tensor, expert_idx: int) -> torch.Tensor:
 class _QuantQwen3VLMoeTextDecoderLayer(QuantModule):
     def _setup(self):
         from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeSparseMoeBlock
-        from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import Qwen3VLMoeTextSparseMoeBlock
+        from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
+            Qwen3VLMoeTextSparseMoeBlock,
+        )
+
         if not isinstance(self.mlp, Qwen3VLMoeTextSparseMoeBlock):
             print(f"Skipping {type(self.mlp)}")
             return
         q_proj_weight = self.self_attn.q_proj.weight
         dtype, device = q_proj_weight.dtype, q_proj_weight.device
+
         def _copy_weight(module, weight):
             module.to(dtype=dtype, device=device)
             with torch.no_grad():
                 module.weight.copy_(weight.detach())
-        
+
         new_moe_layer = Qwen3MoeSparseMoeBlock(self.self_attn.config)
         new_moe_layer.gate = self.mlp.gate
         experts = self.mlp.experts
@@ -509,10 +513,10 @@ def _copy_weight(module, weight):
             _copy_weight(expert.gate_proj, experts.gate_up_proj[idx, :, :expert_dim].T)
             _copy_weight(expert.up_proj, experts.gate_up_proj[idx, :, expert_dim:].T)
             _copy_weight(expert.down_proj, experts.down_proj[idx, :].T)
-        
+
         delattr(self, "mlp")
         self.mlp = new_moe_layer
-        
+
 
 class _QuantDbrxFFN(_QuantSparseMoe):
     @property
@@ -614,6 +618,7 @@ def top_k(self, value):
 except ImportError:
     pass
 
+
 class _QuantGptOssExperts(_QuantFunctionalMixin):
     """Quantized wrapper for `transformers.GptOssExperts`.