1
1
import subprocess
2
2
import time
3
+ import requests
4
+ import sys
5
+ import os
6
+
7
+ def wait_for_server (base_url : str , timeout : int = None ) -> None :
8
+ start_time = time .time ()
9
+ while True :
10
+ try :
11
+ response = requests .get (
12
+ f"{ base_url } /v1/models" ,
13
+ headers = {"Authorization" : "Bearer None" },
14
+ )
15
+ if response .status_code == 200 :
16
+ print ("Server is ready." )
17
+ break
18
+ except requests .exceptions .RequestException :
19
+ time .sleep (1 )
20
+ if timeout and time .time () - start_time > timeout :
21
+ raise TimeoutError ("Server did not become ready within timeout period" )
3
22
4
23
server_cmd = [
24
+ "numactl" , "-N" , "1" , "-m" , "1" ,
5
25
"/home/qujing3/anaconda3/envs/ktransformers-dev/bin/ktransformers" ,
6
26
"--model_path" , "/home/qujing3/models/DeepSeek-R1-Q4_K_M/config" ,
7
- "--gguf_path" , "/home/qujing3/models/DeepSeek-R1-Q4_K_M/ " ,
27
+ "--gguf_path" , "/home/qujing3/models/DeepSeek-V3-GGUF/DeepSeek-V3-Q4_K_M " ,
8
28
"--port" , "10002" ,
9
- "--cpu-infer" , "48"
29
+ "--cpu_infer" , "48" ,
30
+ "--optimize_config_path" , "ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml" ,
31
+ "--max_new_tokens" , "3000" ,
32
+ "--cache_lens" , "6000"
10
33
]
11
34
12
35
print ("Starting ktransformers server..." )
13
- server_process = subprocess .Popen (server_cmd , stdout = subprocess .PIPE , stderr = subprocess .PIPE , text = True )
14
-
15
- while True :
16
- output = server_process .stdout .readline ()
17
- if not output :
18
- break
19
- print (output .strip ())
20
- if "Uvicorn running on http://0.0.0.0:10002" in output :
21
- print ("Server started successfully!" )
22
- break
23
-
24
- eval_cmd = ["python" , "ktransformers/tests/humaneval/eval_api.py" ]
25
- print ("Running eval_api.py..." )
26
- eval_process = subprocess .run (eval_cmd , capture_output = True , text = True )
27
-
28
- print ("Stopping ktransformers server..." )
29
- server_process .terminate ()
30
- server_process .wait ()
31
-
32
- evaluate_cmd = [
33
- "evaluate_functional_correctness" ,
34
- "ktransformers/tests/humaneval/results/api/eval_b.jsonl"
35
- ]
36
- print ("Running evaluate_functional_correctness..." )
37
- evaluate_process = subprocess .run (evaluate_cmd , capture_output = True , text = True )
36
+ print (" " .join (server_cmd ))
37
+ with open ("/tmp/server_log.txt" , "w" ) as f :
38
+ server_process = subprocess .Popen (server_cmd , stdout = f , stderr = f , text = True )
39
+
40
+ try :
41
+ wait_for_server ("http://localhost:10002" , timeout = 600 )
42
+
43
+ eval_cmd = ["python" , "ktransformers/tests/humaneval/eval_api.py" ]
44
+ print ("Running eval_api.py..." )
45
+ print (f"Command: { ' ' .join (eval_cmd )} " )
46
+
47
+ env = os .environ .copy ()
48
+ env ["PYTHONUNBUFFERED" ] = "1"
49
+
50
+ eval_process = subprocess .Popen (
51
+ eval_cmd ,
52
+ stdout = subprocess .PIPE ,
53
+ stderr = subprocess .PIPE ,
54
+ text = True ,
55
+ bufsize = 1 ,
56
+ env = env ,
57
+ universal_newlines = True
58
+ )
59
+
60
+ import threading
61
+ import queue
62
+
63
+ def enqueue_output (out , queue ):
64
+ for line in iter (out .readline , '' ):
65
+ queue .put (line )
66
+ out .close ()
67
+
68
+ stdout_queue = queue .Queue ()
69
+ stderr_queue = queue .Queue ()
70
+
71
+ stdout_thread = threading .Thread (target = enqueue_output , args = (eval_process .stdout , stdout_queue ))
72
+ stderr_thread = threading .Thread (target = enqueue_output , args = (eval_process .stderr , stderr_queue ))
73
+
74
+ stdout_thread .daemon = True
75
+ stderr_thread .daemon = True
76
+ stdout_thread .start ()
77
+ stderr_thread .start ()
78
+
79
+ while eval_process .poll () is None :
80
+ try :
81
+ line = stdout_queue .get_nowait ()
82
+ print (line , end = '' , flush = True )
83
+ except queue .Empty :
84
+ pass
85
+
86
+ try :
87
+ line = stderr_queue .get_nowait ()
88
+ print (line , end = '' , file = sys .stderr , flush = True )
89
+ except queue .Empty :
90
+ pass
91
+
92
+ time .sleep (1 )
93
+
94
+ while not stdout_queue .empty ():
95
+ print (stdout_queue .get (), end = '' , flush = True )
96
+ while not stderr_queue .empty ():
97
+ print (stderr_queue .get (), end = '' , file = sys .stderr , flush = True )
98
+
99
+ eval_process .wait ()
100
+ print (f"eval_api.py completed with exit code: { eval_process .returncode } " )
101
+
102
+ evaluate_cmd = [
103
+ "evaluate_functional_correctness" ,
104
+ "ktransformers/tests/humaneval/results/api/eval_b.jsonl"
105
+ ]
106
+ print ("Running evaluate_functional_correctness..." )
107
+ print (f"Command: { ' ' .join (evaluate_cmd )} " )
108
+
109
+ evaluate_process = subprocess .Popen (
110
+ evaluate_cmd ,
111
+ stdout = subprocess .PIPE ,
112
+ stderr = subprocess .PIPE ,
113
+ text = True ,
114
+ bufsize = 1 ,
115
+ universal_newlines = True
116
+ )
117
+
118
+ for line in evaluate_process .stdout :
119
+ print (line , end = '' , flush = True )
120
+ for line in evaluate_process .stderr :
121
+ print (line , end = '' , file = sys .stderr , flush = True )
122
+
123
+ evaluate_process .wait ()
124
+
125
+ print (f"evaluate_functional_correctness completed with exit code: { evaluate_process .returncode } " )
126
+ if evaluate_process .returncode != 0 :
127
+ print (f"evaluate_functional_correctness exited with code { evaluate_process .returncode } " )
128
+ sys .exit (evaluate_process .returncode )
38
129
39
- print ("Evaluation Output:" )
40
- print (evaluate_process .stdout )
41
- print (evaluate_process .stderr )
130
+ finally :
131
+ print ("Stopping ktransformers server..." )
132
+ server_process .terminate ()
133
+ try :
134
+ server_process .wait (timeout = 30 )
135
+ except subprocess .TimeoutExpired :
136
+ print ("Server did not terminate gracefully, forcing..." )
137
+ server_process .kill ()
0 commit comments