Skip to content

Commit ce99d50

Browse files
authored
[CI] Update nitro options for init and inference (#147)
* fix(ci): Update nitro options for init and inference * fix(ci): Update nitro arg syntax * fix(ci): Add correct mac and windows logical CPU variable in command * fix(ci): Update windows ping with port
1 parent c3a8e38 commit ce99d50

File tree

3 files changed

+15
-16
lines changed

3 files changed

+15
-16
lines changed

Diff for: .github/scripts/e2e-test-linux-and-mac.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ BINARY_PATH=$1
1515
DOWNLOAD_URL=$2
1616

1717
# Start the binary file
18-
"$BINARY_PATH" > /tmp/nitro.log 2>&1 &
18+
"$BINARY_PATH" 1 127.0.0.1 5000 > /tmp/nitro.log 2>&1 &
1919

2020
# Get the process id of the binary file
2121
pid=$!
@@ -37,16 +37,16 @@ if [[ ! -f "/tmp/testmodel" ]]; then
3737
fi
3838

3939
# Run the curl commands
40-
response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/loadModel' \
40+
response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://127.0.0.1:5000/inferences/llamacpp/loadModel' \
4141
--header 'Content-Type: application/json' \
4242
--data '{
4343
"llama_model_path": "/tmp/testmodel",
44-
"ctx_len": 2048,
44+
"ctx_len": 50,
4545
"ngl": 32,
4646
"embedding": false
4747
}' 2>&1)
4848

49-
response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/chat_completion' \
49+
response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://127.0.0.1:5000/inferences/llamacpp/chat_completion' \
5050
--header 'Content-Type: application/json' \
5151
--header 'Accept: text/event-stream' \
5252
--header 'Access-Control-Allow-Origin: *' \
@@ -57,11 +57,11 @@ response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://l
5757
],
5858
"stream": true,
5959
"model": "gpt-3.5-turbo",
60-
"max_tokens": 100,
60+
"max_tokens": 50,
6161
"stop": ["hello"],
6262
"frequency_penalty": 0,
6363
"presence_penalty": 0,
64-
"temperature": 0.7
64+
"temperature": 0.1
6565
}' 2>&1
6666
)
6767

Diff for: .github/scripts/e2e-test-windows.bat

+5-5
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ del %TEMP%\response2.log 2>nul
2121
del %TEMP%\nitro.log 2>nul
2222

2323
rem Start the binary file
24-
start /B "" "%BINARY_PATH%" > %TEMP%\nitro.log 2>&1
24+
start /B "" "%BINARY_PATH%" 1 "127.0.0.1" 5000 > %TEMP%\nitro.log 2>&1
2525

26-
ping -n 6 127.0.0.1 > nul
26+
ping -n 6 127.0.0.1 5000 > nul
2727

2828
rem Capture the PID of the started process with "nitro" in its name
2929
for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
@@ -48,16 +48,16 @@ if not exist "%MODEL_PATH%" (
4848
rem Define JSON strings for curl data
4949
call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
5050
set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}"
51-
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":100,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.7}"
51+
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
5252

5353
rem Print the values of curl_data1 and curl_data2 for debugging
5454
echo curl_data1=%curl_data1%
5555
echo curl_data2=%curl_data2%
5656

5757
rem Run the curl commands and capture the status code
58-
curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
58+
curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:5000/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
5959

60-
curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/chat_completion" ^
60+
curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://127.0.0.1:5000/inferences/llamacpp/chat_completion" ^
6161
--header "Content-Type: application/json" ^
6262
--header "Accept: text/event-stream" ^
6363
--header "Access-Control-Allow-Origin: *" ^

Diff for: .github/workflows/build.yml

+4-5
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ jobs:
163163
./install_deps.sh
164164
mkdir build && cd build
165165
cmake ..
166-
CC=gcc-8 make -j $(nproc)
166+
CC=gcc-8 make -j $(sysctl -n hw.ncp)
167167
ls -la
168168
169169
- name: Package
@@ -213,7 +213,7 @@ jobs:
213213
./install_deps.sh
214214
mkdir build && cd build
215215
cmake -DLLAMA_METAL=OFF ..
216-
CC=gcc-8 make -j $(nproc)
216+
CC=gcc-8 make -j $(sysctl -n hw.ncp)
217217
ls -la
218218
219219
- name: Package
@@ -284,7 +284,7 @@ jobs:
284284
mkdir -p build
285285
cd build
286286
cmake ..
287-
cmake --build . --config Release -j 4
287+
cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
288288
289289
- name: Pack artifacts
290290
id: pack_artifacts
@@ -342,14 +342,13 @@ jobs:
342342
mkdir -p build
343343
cd build
344344
cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON
345-
cmake --build . --config Release -j 4
345+
cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
346346
347347
- name: Pack artifacts
348348
id: pack_artifacts
349349
shell: cmd
350350
run: |
351351
set PATH=%PATH%;C:\Program Files\7-Zip\
352-
echo %PATH%
353352
robocopy build_deps\_install\bin .\build\Release zlib.dll
354353
robocopy build\bin\Release .\build\Release llama.dll
355354
7z a nitro.zip .\build\Release\*

0 commit comments

Comments
 (0)