From 1caa4dcf94c11d585b1f90cc80f4a99fa6bf8fa6 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Thu, 13 Apr 2023 12:55:38 +0800 Subject: [PATCH 01/16] commit --- examples/common.cpp | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index 91d96efae67ff..1c5d384502a09 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -13,6 +13,10 @@ #include #endif +#if defined(_WIN32) || defined(_WIN64) +#include +#endif + #if defined (_WIN32) #include #include @@ -34,9 +38,25 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { // std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0. #ifdef __linux__ std::ifstream cpuinfo("/proc/cpuinfo"); - params.n_threads = std::count(std::istream_iterator(cpuinfo), - std::istream_iterator(), - std::string("processor")); + std::string line; + while (std::getline(cpuinfo, line)) { + if (line.find("cpu cores") != std::string::npos) { + line.erase(0, line.find(": ") + 2); + params.n_threads = std::stoul(line); + break; + } + } +#elif defined(__APPLE__) && defined(__MACH__) + int num_physical_cores; + size_t len = sizeof(num_physical_cores); + int result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + params.n_threads = std::stoul(line); + } +#elif defined(_WIN32) || defined(_WIN64) + SYSTEM_INFO sysinfo; + GetNativeSystemInfo(&sysinfo); + params.n_threads = sysinfo.dwNumberOfProcessors; #endif if (params.n_threads == 0) { params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency()); From f181c28eddd29f0ccc39abf95271a1bd2c968613 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Thu, 13 Apr 2023 13:01:18 +0800 Subject: [PATCH 02/16] fix --- examples/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/common.cpp b/examples/common.cpp index 1c5d384502a09..ede8d1ba70f3c 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -51,7 +51,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { size_t len = sizeof(num_physical_cores); int result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); if (result == 0) { - params.n_threads = std::stoul(line); + params.n_threads = num_physical_cores; } #elif defined(_WIN32) || defined(_WIN64) SYSTEM_INFO sysinfo; From 8694318c71ef0fd22a0b2494f45f59ded1dcdfc4 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Thu, 13 Apr 2023 13:14:15 +0800 Subject: [PATCH 03/16] try-catch --- examples/common.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index ede8d1ba70f3c..25b95feb0371c 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -42,8 +42,10 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { while (std::getline(cpuinfo, line)) { if (line.find("cpu cores") != std::string::npos) { line.erase(0, line.find(": ") + 2); - params.n_threads = std::stoul(line); - break; + try { + params.n_threads = std::stoul(line); + break; + } catch (std::invalid_argument& e) {} // Ignore if we could not parse } } #elif defined(__APPLE__) && defined(__MACH__) From e0325353bef48ee110d0dcf37bf117aada30f05d Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Fri, 14 Apr 2023 03:07:45 +0800 Subject: [PATCH 04/16] apply code review --- examples/common.cpp | 34 +++++++++++++++++++--------------- examples/common.h | 4 +++- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index 25b95feb0371c..c0e9ab1f1f7e4 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -13,13 +13,10 @@ #include #endif -#if defined(_WIN32) || defined(_WIN64) -#include -#endif - #if defined (_WIN32) #include #include +#include #pragma comment(lib,"kernel32.lib") extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle); extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode); @@ -33,9 +30,7 @@ extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int #define CP_UTF8 65001 #endif -bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { - // determine sensible default number of threads. - // std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0. +int32_t get_num_physical_cores() { #ifdef __linux__ std::ifstream cpuinfo("/proc/cpuinfo"); std::string line; @@ -43,25 +38,34 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { if (line.find("cpu cores") != std::string::npos) { line.erase(0, line.find(": ") + 2); try { - params.n_threads = std::stoul(line); - break; + return (int32_t) std::stoul(line); } catch (std::invalid_argument& e) {} // Ignore if we could not parse } } #elif defined(__APPLE__) && defined(__MACH__) int num_physical_cores; size_t len = sizeof(num_physical_cores); - int result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); if (result == 0) { - params.n_threads = num_physical_cores; + return (int32_t) num_physical_cores; + } else { + int result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return (int32_t) num_physical_cores; + } } -#elif defined(_WIN32) || defined(_WIN64) +#elif defined(_WIN32) SYSTEM_INFO sysinfo; GetNativeSystemInfo(&sysinfo); - params.n_threads = sysinfo.dwNumberOfProcessors; + return (in32_t) sysinfo.dwNumberOfProcessors; #endif - if (params.n_threads == 0) { - params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency()); + return -1; +} + +bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { + // Clip if not a valid number of threads + if (params.n_threads <= 0) { + params.n_threads = std::max(1, std::min(8, (int32_t) std::thread::hardware_concurrency())); } bool invalid_param = false; diff --git a/examples/common.h b/examples/common.h index 1ea6f74451811..61e49557fa14b 100644 --- a/examples/common.h +++ b/examples/common.h @@ -13,9 +13,11 @@ // CLI argument parsing // +int32_t get_num_physical_cores(); + struct gpt_params { int32_t seed = -1; // RNG seed - int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); + int32_t n_threads = get_num_physical_cores(); // (if <= 0, = clip(num_logical_cores, 1, 8)) int32_t n_predict = 128; // new tokens to predict int32_t repeat_last_n = 64; // last n tokens to penalize int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions) From 02b0fe86f26e2a1039edf3d19db681694ee49534 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Fri, 14 Apr 2023 03:55:33 +0800 Subject: [PATCH 05/16] improve --- examples/common.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index 7ce4d1fcde33c..65fbb9bbd9caf 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -42,11 +42,10 @@ int32_t get_num_physical_cores() { int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); if (result == 0) { return (int32_t) num_physical_cores; - } else { - int result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); - if (result == 0) { - return (int32_t) num_physical_cores; - } + } + result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return (int32_t) num_physical_cores; } #elif defined(_WIN32) SYSTEM_INFO sysinfo; From 3fa883706833087bf91e59246534a785bab09f0e Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Fri, 14 Apr 2023 04:00:41 +0800 Subject: [PATCH 06/16] improve --- examples/common.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index 65fbb9bbd9caf..49ce6e66aa878 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -37,15 +37,15 @@ int32_t get_num_physical_cores() { } } #elif defined(__APPLE__) && defined(__MACH__) - int num_physical_cores; + int32_t num_physical_cores; size_t len = sizeof(num_physical_cores); int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); if (result == 0) { - return (int32_t) num_physical_cores; + return num_physical_cores; } result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); if (result == 0) { - return (int32_t) num_physical_cores; + return num_physical_cores; } #elif defined(_WIN32) SYSTEM_INFO sysinfo; From e524ce99fec4e383da71b45f106d1d1224c53f51 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Fri, 14 Apr 2023 10:22:52 +0800 Subject: [PATCH 07/16] add macos headers --- examples/common.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/common.cpp b/examples/common.cpp index 49ce6e66aa878..1b77fef4347f2 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -7,6 +7,11 @@ #include #include +#elif defined(__APPLE__) && defined(__MACH__) +#include +#include +#endif + #if defined (_WIN32) #include #include @@ -37,6 +42,7 @@ int32_t get_num_physical_cores() { } } #elif defined(__APPLE__) && defined(__MACH__) + int32_t num_physical_cores; size_t len = sizeof(num_physical_cores); int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); From 81edec9776d1bd60e159af20a1e221295dac13b9 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Sun, 16 Apr 2023 00:12:04 +0800 Subject: [PATCH 08/16] done --- examples/common.cpp | 20 +++++++++++++------- examples/common.h | 4 +--- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index 1b77fef4347f2..94b5ead6fdb28 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -1,13 +1,14 @@ #include "common.h" #include +#include #include #include #include #include #include -#elif defined(__APPLE__) && defined(__MACH__) +#if defined(__APPLE__) && defined(__MACH__) #include #include #endif @@ -42,7 +43,6 @@ int32_t get_num_physical_cores() { } } #elif defined(__APPLE__) && defined(__MACH__) - int32_t num_physical_cores; size_t len = sizeof(num_physical_cores); int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); @@ -62,11 +62,6 @@ int32_t get_num_physical_cores() { } bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { - // Clip if not a valid number of threads - if (params.n_threads <= 0) { - params.n_threads = std::max(1, std::min(8, (int32_t) std::thread::hardware_concurrency())); - } - bool invalid_param = false; std::string arg; gpt_params default_params; @@ -229,6 +224,17 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { exit(1); } + // Clip if not a valid number of threads + if (params.n_threads <= 0) { + int32_t physical_cores = get_num_physical_cores(); + if (physical_cores > 4) { + std::cout << "\n\033[1;31mWARNING:\033[0m Defaulting to 4 threads. " + << "(detected " << physical_cores << " physical cores)" << std::endl + << "Adjust --threads based on your observed inference speed in ms/token." << std::endl << std::endl; + } + params.n_threads = std::max(1, std::min(4, physical_cores)); + } + return true; } diff --git a/examples/common.h b/examples/common.h index 61e49557fa14b..37a4d66abafcf 100644 --- a/examples/common.h +++ b/examples/common.h @@ -13,11 +13,9 @@ // CLI argument parsing // -int32_t get_num_physical_cores(); - struct gpt_params { int32_t seed = -1; // RNG seed - int32_t n_threads = get_num_physical_cores(); // (if <= 0, = clip(num_logical_cores, 1, 8)) + int32_t n_threads = 0; int32_t n_predict = 128; // new tokens to predict int32_t repeat_last_n = 64; // last n tokens to penalize int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions) From 1a6c8cf72c7d2159c9d03163f6a2b8010fe5bcd2 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Sun, 16 Apr 2023 02:47:32 +0800 Subject: [PATCH 09/16] remove color --- examples/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/common.cpp b/examples/common.cpp index 94b5ead6fdb28..eaa5605b8fff9 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -228,7 +228,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { if (params.n_threads <= 0) { int32_t physical_cores = get_num_physical_cores(); if (physical_cores > 4) { - std::cout << "\n\033[1;31mWARNING:\033[0m Defaulting to 4 threads. " + std::cerr << "\nWARNING: Defaulting to 4 threads. " << "(detected " << physical_cores << " physical cores)" << std::endl << "Adjust --threads based on your observed inference speed in ms/token." << std::endl << std::endl; } From 9ee4719ee9316a59532b24416a978e98fa193e33 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Sun, 16 Apr 2023 21:19:34 +0800 Subject: [PATCH 10/16] fix windows --- examples/common.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index eaa5605b8fff9..4701f9f1e3e18 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -16,7 +16,6 @@ #if defined (_WIN32) #include #include -#include #pragma comment(lib,"kernel32.lib") extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle); extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode); @@ -56,7 +55,7 @@ int32_t get_num_physical_cores() { #elif defined(_WIN32) SYSTEM_INFO sysinfo; GetNativeSystemInfo(&sysinfo); - return (in32_t) sysinfo.dwNumberOfProcessors; + return static_cast(sysinfo.dwNumberOfProcessors); #endif return -1; } From df2d350c009095e784e6c05524c61b286a167f53 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Sun, 16 Apr 2023 22:10:26 +0800 Subject: [PATCH 11/16] minor --- examples/common.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/common.cpp b/examples/common.cpp index 4701f9f1e3e18..7eb995a09043f 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -16,6 +16,7 @@ #if defined (_WIN32) #include #include +#include #pragma comment(lib,"kernel32.lib") extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle); extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode); From 4a98a0f21ad63d97a643ba6fb21f613cb596cb23 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Wed, 26 Apr 2023 22:37:52 +0800 Subject: [PATCH 12/16] fix --- examples/common.cpp | 14 +++++--------- examples/main/README.md | 32 +++++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index df26e7cf1102e..cd80b1b0563d9 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -54,9 +54,8 @@ int32_t get_num_physical_cores() { return num_physical_cores; } #elif defined(_WIN32) - SYSTEM_INFO sysinfo; - GetNativeSystemInfo(&sysinfo); - return static_cast(sysinfo.dwNumberOfProcessors); + std::cerr << "WARNING: automatic calibration not supported on Windows. Defaulting to 4 threads.\n" << std::endl; + return 4; #endif return -1; } @@ -237,13 +236,10 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { // Clip if not a valid number of threads if (params.n_threads <= 0) { + std::cerr << "\nWARNING: Using number of physical cores as the default number of threads.\n\ +If your chipset has efficient/performance cores, use the number of performance cores instead.\n" << std::endl; int32_t physical_cores = get_num_physical_cores(); - if (physical_cores > 4) { - std::cerr << "\nWARNING: Defaulting to 4 threads. " - << "(detected " << physical_cores << " physical cores)" << std::endl - << "Adjust --threads based on your observed inference speed in ms/token." << std::endl << std::endl; - } - params.n_threads = std::max(1, std::min(4, physical_cores)); + params.n_threads = std::max(1, physical_cores); } return true; diff --git a/examples/main/README.md b/examples/main/README.md index 234bf2eb56639..da4779cb285c5 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -4,14 +4,28 @@ This example program allows you to use various LLaMA language models in an easy ## Table of Contents -1. [Quick Start](#quick-start) -2. [Common Options](#common-options) -3. [Input Prompts](#input-prompts) -4. [Interaction](#interaction) -5. [Context Management](#context-management) -6. [Generation Flags](#generation-flags) -7. [Performance Tuning and Memory Options](#performance-tuning-and-memory-options) -8. [Additional Options](#additional-options) +- [llama.cpp/example/main](#llamacppexamplemain) + - [Table of Contents](#table-of-contents) + - [Quick Start](#quick-start) + - [Common Options](#common-options) + - [Input Prompts](#input-prompts) + - [Interaction](#interaction) + - [Interaction Options](#interaction-options) + - [Reverse Prompts](#reverse-prompts) + - [In-Prefix](#in-prefix) + - [Instruction Mode](#instruction-mode) + - [Context Management](#context-management) + - [Context Size](#context-size) + - [Keep Prompt](#keep-prompt) + - [Generation Flags](#generation-flags) + - [Number of Tokens to Predict](#number-of-tokens-to-predict) + - [RNG Seed](#rng-seed) + - [Temperature](#temperature) + - [Repeat Penalty](#repeat-penalty) + - [Top-K Sampling](#top-k-sampling) + - [Top-P Sampling](#top-p-sampling) + - [Performance Tuning and Memory Options](#performance-tuning-and-memory-options) + - [Additional Options](#additional-options) ## Quick Start @@ -170,7 +184,7 @@ By adjusting these options, you can control the diversity, quality, and creativi These options help improve the performance and memory usage of the LLaMA models: -- `-t N, --threads N`: Set the number of threads to use during computation. Using the correct number of threads can greatly improve performance. It is recommended to set this value to the number of CPU cores. +- `-t N, --threads N`: Set the number of threads to use during computation. Using the correct number of threads can greatly improve performance. It is recommended to set this value to the number of physical CPU cores, or the number of performance cores in a chipset with efficiency/performance (E/P) cores. - `--mlock`: Lock the model in memory, preventing it from being swapped out when mmaped. This can improve performance. - `--no-mmap`: Do not memory-map the model. This results in a slower load time but may reduce pageouts if you're not using `mlock`. - `--memory_f32`: Use 32 bit floats instead of 16 bit floats for memory key+value, allowing higher quality inference at the cost of memory. From 710c4bbdbfc7a7ada9954bb1bdb0ee466f7db4e3 Mon Sep 17 00:00:00 2001 From: jon-chuang <9093549+jon-chuang@users.noreply.github.com> Date: Sun, 30 Apr 2023 18:10:08 +0800 Subject: [PATCH 13/16] Apply suggestions from code review Co-authored-by: DannyDaemonic --- examples/common.cpp | 30 ++++++++++++++---------------- examples/common.h | 2 +- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index cd80b1b0563d9..073097b582f69 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -35,11 +35,17 @@ int32_t get_num_physical_cores() { std::ifstream cpuinfo("/proc/cpuinfo"); std::string line; while (std::getline(cpuinfo, line)) { - if (line.find("cpu cores") != std::string::npos) { - line.erase(0, line.find(": ") + 2); - try { - return (int32_t) std::stoul(line); - } catch (std::invalid_argument& e) {} // Ignore if we could not parse + std::size_t pos = line.find("cpu cores"); + if (pos != std::string::npos) { + pos = line.find(": ", pos); + if (pos != std::string::npos) { + try { + // Extract the number and return it + return static_cast(std::stoul(line.substr(pos + 2))); + } catch (const std::invalid_argument &) { + // Ignore if we could not parse + } + } } } #elif defined(__APPLE__) && defined(__MACH__) @@ -54,10 +60,10 @@ int32_t get_num_physical_cores() { return num_physical_cores; } #elif defined(_WIN32) - std::cerr << "WARNING: automatic calibration not supported on Windows. Defaulting to 4 threads.\n" << std::endl; - return 4; + //TODO: Implement #endif - return -1; + unsigned int n_threads = std::thread::hardware_concurrency() + return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; } bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { @@ -234,14 +240,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { exit(1); } - // Clip if not a valid number of threads - if (params.n_threads <= 0) { - std::cerr << "\nWARNING: Using number of physical cores as the default number of threads.\n\ -If your chipset has efficient/performance cores, use the number of performance cores instead.\n" << std::endl; - int32_t physical_cores = get_num_physical_cores(); - params.n_threads = std::max(1, physical_cores); - } - return true; } diff --git a/examples/common.h b/examples/common.h index b3d2c50334bbe..df939ed2837a3 100644 --- a/examples/common.h +++ b/examples/common.h @@ -15,7 +15,7 @@ struct gpt_params { int32_t seed = -1; // RNG seed - int32_t n_threads = 0; + int32_t n_threads = get_num_physical_cores(); int32_t n_predict = 128; // new tokens to predict int32_t repeat_last_n = 64; // last n tokens to penalize int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions) From f1c19d88847f5d295dadf8fbc41c01f4e3123b00 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Sun, 30 Apr 2023 18:11:52 +0800 Subject: [PATCH 14/16] remove --- examples/main/README.md | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/examples/main/README.md b/examples/main/README.md index da4779cb285c5..234bf2eb56639 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -4,28 +4,14 @@ This example program allows you to use various LLaMA language models in an easy ## Table of Contents -- [llama.cpp/example/main](#llamacppexamplemain) - - [Table of Contents](#table-of-contents) - - [Quick Start](#quick-start) - - [Common Options](#common-options) - - [Input Prompts](#input-prompts) - - [Interaction](#interaction) - - [Interaction Options](#interaction-options) - - [Reverse Prompts](#reverse-prompts) - - [In-Prefix](#in-prefix) - - [Instruction Mode](#instruction-mode) - - [Context Management](#context-management) - - [Context Size](#context-size) - - [Keep Prompt](#keep-prompt) - - [Generation Flags](#generation-flags) - - [Number of Tokens to Predict](#number-of-tokens-to-predict) - - [RNG Seed](#rng-seed) - - [Temperature](#temperature) - - [Repeat Penalty](#repeat-penalty) - - [Top-K Sampling](#top-k-sampling) - - [Top-P Sampling](#top-p-sampling) - - [Performance Tuning and Memory Options](#performance-tuning-and-memory-options) - - [Additional Options](#additional-options) +1. [Quick Start](#quick-start) +2. [Common Options](#common-options) +3. [Input Prompts](#input-prompts) +4. [Interaction](#interaction) +5. [Context Management](#context-management) +6. [Generation Flags](#generation-flags) +7. [Performance Tuning and Memory Options](#performance-tuning-and-memory-options) +8. [Additional Options](#additional-options) ## Quick Start @@ -184,7 +170,7 @@ By adjusting these options, you can control the diversity, quality, and creativi These options help improve the performance and memory usage of the LLaMA models: -- `-t N, --threads N`: Set the number of threads to use during computation. Using the correct number of threads can greatly improve performance. It is recommended to set this value to the number of physical CPU cores, or the number of performance cores in a chipset with efficiency/performance (E/P) cores. +- `-t N, --threads N`: Set the number of threads to use during computation. Using the correct number of threads can greatly improve performance. It is recommended to set this value to the number of CPU cores. - `--mlock`: Lock the model in memory, preventing it from being swapped out when mmaped. This can improve performance. - `--no-mmap`: Do not memory-map the model. This results in a slower load time but may reduce pageouts if you're not using `mlock`. - `--memory_f32`: Use 32 bit floats instead of 16 bit floats for memory key+value, allowing higher quality inference at the cost of memory. From 2fbc90f25eadfa9fc5aa45e77f52fe78af3b725b Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Sun, 30 Apr 2023 18:14:41 +0800 Subject: [PATCH 15/16] minor --- examples/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/common.cpp b/examples/common.cpp index ea3a804da8101..95b066be241d0 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -64,7 +64,7 @@ int32_t get_num_physical_cores() { #elif defined(_WIN32) //TODO: Implement #endif - unsigned int n_threads = std::thread::hardware_concurrency() + unsigned int n_threads = std::thread::hardware_concurrency(); return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; } From 78761b10b69366d28bf0008018fdd2e3140e9c91 Mon Sep 17 00:00:00 2001 From: jon-chuang Date: Sun, 30 Apr 2023 18:15:53 +0800 Subject: [PATCH 16/16] minor --- examples/common.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/common.cpp b/examples/common.cpp index 95b066be241d0..ad7b0bba32f1f 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #if defined(__APPLE__) && defined(__MACH__) #include @@ -18,7 +17,6 @@ #if defined (_WIN32) #include #include -#include #pragma comment(lib,"kernel32.lib") extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle); extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode);