Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions configs/Hy3/ptq/fp8/Hy3_smooth.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ collect_moe: true
ema_momentum: 0.9

# ========== Phase 2: smooth flavours & fixed alpha ==========
smooth_qk: true
smooth_vo: true
smooth_qk: false
smooth_vo: false
smooth_down: true
alpha_qk: 0.6
alpha_vo: 0.5
Expand Down Expand Up @@ -56,5 +56,5 @@ alpha_smooth_search_mode: default
# per-tensor-act-first mode parameters (only effective when mode=per-tensor-act-first)
# alpha_act_mul_min: 0.1
# alpha_act_mul_max: 1.0
# alpha_smooth_min: 1e-6
# alpha_smooth_max: 1e6
# alpha_smooth_min: 0.000001
# alpha_smooth_max: 100000
8 changes: 4 additions & 4 deletions scripts/ptq/run_smooth_for_HY3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ done
# -------- Environment Variables --------
# Allow function serialization for apply_model in vLLM v1 engine
export VLLM_ALLOW_INSECURE_SERIALIZATION=1
# Enable MoE expert statistics collection
export VLLM_MOE_COLLECT_STATS=1
# Force Ray to reload code (disable code caching)
export RAY_DEDUP_LOGS=0
# Force Python to not use bytecode cache
Expand All @@ -43,7 +41,6 @@ export PYTHONDONTWRITEBYTECODE=1

export MAX_NUM_BATCHED_TOKENS=32768
export VLLM_ENABLE_CHUNKED_PREFILL=1
export MOE_MODE=fused
export VLLM_ATTENTION_BACKEND=FLASHINFER
export ASYNC_SCHEDULING=1
export VLLM_ENABLE_PREFIX_CACHING=1
Expand All @@ -54,7 +51,6 @@ export PRECISIONMODE=HF
export VLLM_MOE_COLLECT_SMOOTH_STATS=1
export VLLM_MOE_COLLECT_ALPHA_SEARCH=1

export PYTHONPATH=/cfs_cloud_code/gavinlee/work/open_source_smooth/AngelSlim
# -------- Phase 1: Collect Smooth Stats + Alpha Search --------
if [ "$SKIP_CALIBRATE" = false ]; then
echo "========================================"
Expand All @@ -72,6 +68,10 @@ if [ "$SKIP_CONVERT" = false ]; then
python3 tools/smooth/convert_smooth_weights.py -c "$CONFIG"
fi

# revert
unset VLLM_MOE_COLLECT_SMOOTH_STATS
unset VLLM_MOE_COLLECT_ALPHA_SEARCH

echo "========================================"
echo "Done."
echo "========================================"
Loading
Loading