[CI] Update nitro options for init and inference (#147)

hiro-v · web-flow · commit ce99d5010aae · 2023-11-17T13:50:45.000+07:00
* fix(ci): Update nitro options for init and inference

* fix(ci): Update nitro arg syntax

* fix(ci): Add correct mac and windows logical CPU variable in command

* fix(ci): Update windows ping with port
diff --git a/.github/scripts/e2e-test-linux-and-mac.sh b/.github/scripts/e2e-test-linux-and-mac.sh
@@ -15,7 +15,7 @@ BINARY_PATH=$1
 DOWNLOAD_URL=$2
 
 # Start the binary file
-"$BINARY_PATH" > /tmp/nitro.log 2>&1 &
+"$BINARY_PATH" 1 127.0.0.1 5000 > /tmp/nitro.log 2>&1 &
 
 # Get the process id of the binary file
 pid=$!
@@ -37,16 +37,16 @@ if [[ ! -f "/tmp/testmodel" ]]; then
 fi
 
 # Run the curl commands
-response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/loadModel' \
+response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://127.0.0.1:5000/inferences/llamacpp/loadModel' \
 --header 'Content-Type: application/json' \
 --data '{
     "llama_model_path": "/tmp/testmodel",
-    "ctx_len": 2048,
+    "ctx_len": 50,
     "ngl": 32,
     "embedding": false
 }' 2>&1)
 
-response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/chat_completion' \
+response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://127.0.0.1:5000/inferences/llamacpp/chat_completion' \
 --header 'Content-Type: application/json' \
 --header 'Accept: text/event-stream' \
 --header 'Access-Control-Allow-Origin: *' \
@@ -57,11 +57,11 @@ response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://l
         ],
         "stream": true,
         "model": "gpt-3.5-turbo",
-        "max_tokens": 100,
+        "max_tokens": 50,
         "stop": ["hello"],
         "frequency_penalty": 0,
         "presence_penalty": 0,
-        "temperature": 0.7
+        "temperature": 0.1
      }' 2>&1
 )
 
diff --git a/.github/scripts/e2e-test-windows.bat b/.github/scripts/e2e-test-windows.bat
@@ -21,9 +21,9 @@ del %TEMP%\response2.log 2>nul
 del %TEMP%\nitro.log 2>nul
 
 rem Start the binary file
-start /B "" "%BINARY_PATH%" > %TEMP%\nitro.log 2>&1
+start /B "" "%BINARY_PATH%" 1 "127.0.0.1" 5000 > %TEMP%\nitro.log 2>&1
 
-ping -n 6 127.0.0.1 > nul
+ping -n 6 127.0.0.1 5000 > nul
 
 rem Capture the PID of the started process with "nitro" in its name
 for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
@@ -48,16 +48,16 @@ if not exist "%MODEL_PATH%" (
 rem Define JSON strings for curl data
 call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
 set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}"
-set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":100,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.7}"
+set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
 
 rem Print the values of curl_data1 and curl_data2 for debugging
 echo curl_data1=%curl_data1%
 echo curl_data2=%curl_data2%
 
 rem Run the curl commands and capture the status code
-curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
+curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:5000/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
 
-curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/chat_completion" ^
+curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://127.0.0.1:5000/inferences/llamacpp/chat_completion" ^
 --header "Content-Type: application/json" ^
 --header "Accept: text/event-stream" ^
 --header "Access-Control-Allow-Origin: *" ^
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -163,7 +163,7 @@ jobs:
           ./install_deps.sh
           mkdir build && cd build
           cmake .. 
-          CC=gcc-8 make -j $(nproc)
+          CC=gcc-8 make -j $(sysctl -n hw.ncp)
           ls -la 
   
       - name: Package
@@ -213,7 +213,7 @@ jobs:
           ./install_deps.sh
           mkdir build && cd build
           cmake -DLLAMA_METAL=OFF .. 
-          CC=gcc-8 make -j $(nproc)
+          CC=gcc-8 make -j $(sysctl -n hw.ncp)
           ls -la 
     
       - name: Package
@@ -284,7 +284,7 @@ jobs:
           mkdir -p build
           cd build
           cmake ..
-          cmake --build . --config Release -j 4
+          cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
 
       - name: Pack artifacts
         id: pack_artifacts
@@ -342,14 +342,13 @@ jobs:
           mkdir -p build
           cd build
           cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON
-          cmake --build . --config Release -j 4
+          cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
 
       - name: Pack artifacts
         id: pack_artifacts
         shell: cmd
         run: |
           set PATH=%PATH%;C:\Program Files\7-Zip\
-          echo %PATH%
           robocopy build_deps\_install\bin .\build\Release zlib.dll
           robocopy build\bin\Release .\build\Release llama.dll
           7z a nitro.zip .\build\Release\*