Skip to content

Commit 05f6ced

Browse files
authored
Merge pull request #943 from SkqLiao/main
fix benchmark params for human eval benchmark
2 parents ddd35d5 + 6d4626a commit 05f6ced

File tree

1 file changed

+126
-30
lines changed

1 file changed

+126
-30
lines changed

ktransformers/tests/score.py

+126-30
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,137 @@
11
import subprocess
22
import time
3+
import requests
4+
import sys
5+
import os
6+
7+
def wait_for_server(base_url: str, timeout: int = None) -> None:
8+
start_time = time.time()
9+
while True:
10+
try:
11+
response = requests.get(
12+
f"{base_url}/v1/models",
13+
headers={"Authorization": "Bearer None"},
14+
)
15+
if response.status_code == 200:
16+
print("Server is ready.")
17+
break
18+
except requests.exceptions.RequestException:
19+
time.sleep(1)
20+
if timeout and time.time() - start_time > timeout:
21+
raise TimeoutError("Server did not become ready within timeout period")
322

423
server_cmd = [
24+
"numactl", "-N", "1", "-m", "1",
525
"/home/qujing3/anaconda3/envs/ktransformers-dev/bin/ktransformers",
626
"--model_path", "/home/qujing3/models/DeepSeek-R1-Q4_K_M/config",
7-
"--gguf_path", "/home/qujing3/models/DeepSeek-R1-Q4_K_M/",
27+
"--gguf_path", "/home/qujing3/models/DeepSeek-V3-GGUF/DeepSeek-V3-Q4_K_M",
828
"--port", "10002",
9-
"--cpu-infer", "48"
29+
"--cpu_infer", "48",
30+
"--optimize_config_path", "ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml",
31+
"--max_new_tokens", "3000",
32+
"--cache_lens", "6000"
1033
]
1134

1235
print("Starting ktransformers server...")
13-
server_process = subprocess.Popen(server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
14-
15-
while True:
16-
output = server_process.stdout.readline()
17-
if not output:
18-
break
19-
print(output.strip())
20-
if "Uvicorn running on http://0.0.0.0:10002" in output:
21-
print("Server started successfully!")
22-
break
23-
24-
eval_cmd = ["python", "ktransformers/tests/humaneval/eval_api.py"]
25-
print("Running eval_api.py...")
26-
eval_process = subprocess.run(eval_cmd, capture_output=True, text=True)
27-
28-
print("Stopping ktransformers server...")
29-
server_process.terminate()
30-
server_process.wait()
31-
32-
evaluate_cmd = [
33-
"evaluate_functional_correctness",
34-
"ktransformers/tests/humaneval/results/api/eval_b.jsonl"
35-
]
36-
print("Running evaluate_functional_correctness...")
37-
evaluate_process = subprocess.run(evaluate_cmd, capture_output=True, text=True)
36+
print(" ".join(server_cmd))
37+
with open("/tmp/server_log.txt", "w") as f:
38+
server_process = subprocess.Popen(server_cmd, stdout=f, stderr=f, text=True)
39+
40+
try:
41+
wait_for_server("http://localhost:10002", timeout=600)
42+
43+
eval_cmd = ["python", "ktransformers/tests/humaneval/eval_api.py"]
44+
print("Running eval_api.py...")
45+
print(f"Command: {' '.join(eval_cmd)}")
46+
47+
env = os.environ.copy()
48+
env["PYTHONUNBUFFERED"] = "1"
49+
50+
eval_process = subprocess.Popen(
51+
eval_cmd,
52+
stdout=subprocess.PIPE,
53+
stderr=subprocess.PIPE,
54+
text=True,
55+
bufsize=1,
56+
env=env,
57+
universal_newlines=True
58+
)
59+
60+
import threading
61+
import queue
62+
63+
def enqueue_output(out, queue):
64+
for line in iter(out.readline, ''):
65+
queue.put(line)
66+
out.close()
67+
68+
stdout_queue = queue.Queue()
69+
stderr_queue = queue.Queue()
70+
71+
stdout_thread = threading.Thread(target=enqueue_output, args=(eval_process.stdout, stdout_queue))
72+
stderr_thread = threading.Thread(target=enqueue_output, args=(eval_process.stderr, stderr_queue))
73+
74+
stdout_thread.daemon = True
75+
stderr_thread.daemon = True
76+
stdout_thread.start()
77+
stderr_thread.start()
78+
79+
while eval_process.poll() is None:
80+
try:
81+
line = stdout_queue.get_nowait()
82+
print(line, end='', flush=True)
83+
except queue.Empty:
84+
pass
85+
86+
try:
87+
line = stderr_queue.get_nowait()
88+
print(line, end='', file=sys.stderr, flush=True)
89+
except queue.Empty:
90+
pass
91+
92+
time.sleep(1)
93+
94+
while not stdout_queue.empty():
95+
print(stdout_queue.get(), end='', flush=True)
96+
while not stderr_queue.empty():
97+
print(stderr_queue.get(), end='', file=sys.stderr, flush=True)
98+
99+
eval_process.wait()
100+
print(f"eval_api.py completed with exit code: {eval_process.returncode}")
101+
102+
evaluate_cmd = [
103+
"evaluate_functional_correctness",
104+
"ktransformers/tests/humaneval/results/api/eval_b.jsonl"
105+
]
106+
print("Running evaluate_functional_correctness...")
107+
print(f"Command: {' '.join(evaluate_cmd)}")
108+
109+
evaluate_process = subprocess.Popen(
110+
evaluate_cmd,
111+
stdout=subprocess.PIPE,
112+
stderr=subprocess.PIPE,
113+
text=True,
114+
bufsize=1,
115+
universal_newlines=True
116+
)
117+
118+
for line in evaluate_process.stdout:
119+
print(line, end='', flush=True)
120+
for line in evaluate_process.stderr:
121+
print(line, end='', file=sys.stderr, flush=True)
122+
123+
evaluate_process.wait()
124+
125+
print(f"evaluate_functional_correctness completed with exit code: {evaluate_process.returncode}")
126+
if evaluate_process.returncode != 0:
127+
print(f"evaluate_functional_correctness exited with code {evaluate_process.returncode}")
128+
sys.exit(evaluate_process.returncode)
38129

39-
print("Evaluation Output:")
40-
print(evaluate_process.stdout)
41-
print(evaluate_process.stderr)
130+
finally:
131+
print("Stopping ktransformers server...")
132+
server_process.terminate()
133+
try:
134+
server_process.wait(timeout=30)
135+
except subprocess.TimeoutExpired:
136+
print("Server did not terminate gracefully, forcing...")
137+
server_process.kill()

0 commit comments

Comments
 (0)