-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdeploy_eval_core.sh
More file actions
116 lines (93 loc) · 3.09 KB
/
deploy_eval_core.sh
File metadata and controls
116 lines (93 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/bash
PRE_BASE_DIR="/workspace/ww/pretrained_models"
# model info
trained_model="$1"
base="$2"
game="$3"
gpu="$4"
# final, ckpt, pre
type="$5"
sub_type=${type:0:3}
base_name="base_$base"
game_name="game_$game"
gpu_name="gpu_$gpu"
# config file
base_model_file="eval_conf/base_model.conf"
game_info_file="eval_conf/game_info.conf"
gpu_info_file="eval_conf/gpu_info.conf"
# import predefined config
source $base_model_file
source $game_info_file
source $gpu_info_file
# base model info
PRE_MODEL_DIR=$(eval echo \${${base_name}[0]})
OUT_BASE_DIR=$(eval echo \${${base_name}[1]})
source_config=$(eval echo \${${base_name}[2]})
temp_config=$(eval echo \${${base_name}[3]})
echo $PRE_MODEL_DIR
# gpu info
api_port=$(eval echo \${${gpu_name}[0]})
gpu_server=$(eval echo \${${gpu_name}[1]})
gpu_client=$(eval echo \${${gpu_name}[2]})
echo $api_port
start_server() {
local log_file=$1
cp $source_config $temp_config
sed -i "s|^model_name_or_path: .*|model_name_or_path: $PRE_BASE_DIR/${PRE_MODEL_DIR}|" $temp_config
sed -i "s|^adapter_name_or_path: .*|adapter_name_or_path: $OUT_BASE_DIR-${PRE_MODEL_DIR}/${trained_model}|" $temp_config
if [ "$sub_type" = "pre" ]; then
sed -i "s|^adapter_name_or_path: .*|# adapter_name_or_path: |" $temp_config
fi
export API_PORT=$api_port
export CUDA_VISIBLE_DEVICES=$gpu_server
nohup llamafactory-cli api $temp_config > $log_file 2>&1 &
process_id=$!
echo $process_id
}
run_eval() {
export API_PORT=$api_port
cuda=$gpu_client
test_model="$(echo ${trained_model} | tr '/' '-')-t${API_TEMP}"
base_dir=logs_eval_$type-$PRE_MODEL_DIR
# game info
test_game=$(eval echo \${${game_name}[0]})
seed=$(eval echo \${${game_name}[1]})
num_games=$(eval echo \${${game_name}[2]})
player1=$(eval echo \${${game_name}[3]})
player2=$(eval echo \${${game_name}[4]})
echo $test_game
bash eval.sh $test_game $seed $num_games $cuda $player1 $player2 $test_model $base_dir
if [ "$test_game" == "doudizhu" ]; then
bash eval.sh $test_game $seed $num_games $cuda $player2 $player1 $test_model $base_dir
fi
}
check_server() {
local log_file=$1
local max_attempts=60
local attempt=1
# wait for server to start
while [ $attempt -le $max_attempts ]; do
sleep 30s
# check if server started successfully by checking deploy_out.log
if grep -q "running on http://0.0.0.0:${api_port} (Press CTRL+C to quit)" $log_file; then
echo "Server started successfully. Now running eval.sh..."
break
else
echo "Attempt $attempt/$max_attempts: Server did not start successfully."
fi
attempt=$((attempt + 1))
done
if [ $attempt -gt $max_attempts ]; then
echo "Server failed to start after $max_attempts attempts."
return 1
fi
return 0
}
# main function
log_file="./deploy_out_fix/deploy_out-$PRE_MODEL_DIR-$(echo ${trained_model} | tr '/' '-')-${game}.log"
echo $log_file
process_id=$(start_server $log_file)
if check_server $log_file; then
run_eval
fi
kill $process_id