@@ -490,17 +490,21 @@ def forward(self, x: torch.Tensor, expert_idx: int) -> torch.Tensor:
490490class _QuantQwen3VLMoeTextDecoderLayer (QuantModule ):
491491 def _setup (self ):
492492 from transformers .models .qwen3_moe .modeling_qwen3_moe import Qwen3MoeSparseMoeBlock
493- from transformers .models .qwen3_vl_moe .modeling_qwen3_vl_moe import Qwen3VLMoeTextSparseMoeBlock
493+ from transformers .models .qwen3_vl_moe .modeling_qwen3_vl_moe import (
494+ Qwen3VLMoeTextSparseMoeBlock ,
495+ )
496+
494497 if not isinstance (self .mlp , Qwen3VLMoeTextSparseMoeBlock ):
495498 print (f"Skipping { type (self .mlp )} " )
496499 return
497500 q_proj_weight = self .self_attn .q_proj .weight
498501 dtype , device = q_proj_weight .dtype , q_proj_weight .device
502+
499503 def _copy_weight (module , weight ):
500504 module .to (dtype = dtype , device = device )
501505 with torch .no_grad ():
502506 module .weight .copy_ (weight .detach ())
503-
507+
504508 new_moe_layer = Qwen3MoeSparseMoeBlock (self .self_attn .config )
505509 new_moe_layer .gate = self .mlp .gate
506510 experts = self .mlp .experts
@@ -509,10 +513,10 @@ def _copy_weight(module, weight):
509513 _copy_weight (expert .gate_proj , experts .gate_up_proj [idx , :, :expert_dim ].T )
510514 _copy_weight (expert .up_proj , experts .gate_up_proj [idx , :, expert_dim :].T )
511515 _copy_weight (expert .down_proj , experts .down_proj [idx , :].T )
512-
516+
513517 delattr (self , "mlp" )
514518 self .mlp = new_moe_layer
515-
519+
516520
517521class _QuantDbrxFFN (_QuantSparseMoe ):
518522 @property
@@ -614,6 +618,7 @@ def top_k(self, value):
614618except ImportError :
615619 pass
616620
621+
617622class _QuantGptOssExperts (_QuantFunctionalMixin ):
618623 """Quantized wrapper for `transformers.GptOssExperts`.
619624
0 commit comments