Skip to content

Commit 79b304c

Browse files
committed
Initial commit
0 parents  commit 79b304c

10 files changed

+736
-0
lines changed

.gitignore

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
.envrc
2+
3+
models/
4+
5+
# Byte-compiled / optimized / DLL files
6+
__pycache__/
7+
*.py[cod]
8+
*$py.class
9+
10+
# C extensions
11+
*.so
12+
13+
# Distribution / packaging
14+
.Python
15+
build/
16+
develop-eggs/
17+
dist/
18+
downloads/
19+
eggs/
20+
.eggs/
21+
lib/
22+
lib64/
23+
parts/
24+
sdist/
25+
var/
26+
wheels/
27+
share/python-wheels/
28+
*.egg-info/
29+
.installed.cfg
30+
*.egg
31+
MANIFEST
32+
33+
# PyInstaller
34+
# Usually these files are written by a python script from a template
35+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
36+
*.manifest
37+
*.spec
38+
39+
# Installer logs
40+
pip-log.txt
41+
pip-delete-this-directory.txt
42+
43+
# Unit test / coverage reports
44+
htmlcov/
45+
.tox/
46+
.nox/
47+
.coverage
48+
.coverage.*
49+
.cache
50+
nosetests.xml
51+
coverage.xml
52+
*.cover
53+
*.py,cover
54+
.hypothesis/
55+
.pytest_cache/
56+
cover/
57+
58+
# Translations
59+
*.mo
60+
*.pot
61+
62+
# Django stuff:
63+
*.log
64+
local_settings.py
65+
db.sqlite3
66+
db.sqlite3-journal
67+
68+
# Flask stuff:
69+
instance/
70+
.webassets-cache
71+
72+
# Scrapy stuff:
73+
.scrapy
74+
75+
# Sphinx documentation
76+
docs/_build/
77+
78+
# PyBuilder
79+
.pybuilder/
80+
target/
81+
82+
# Jupyter Notebook
83+
.ipynb_checkpoints
84+
85+
# IPython
86+
profile_default/
87+
ipython_config.py
88+
89+
# pyenv
90+
# For a library or package, you might want to ignore these files since the code is
91+
# intended to run in multiple environments; otherwise, check them in:
92+
# .python-version
93+
94+
# pipenv
95+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
97+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
98+
# install all needed dependencies.
99+
#Pipfile.lock
100+
101+
# poetry
102+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103+
# This is especially recommended for binary packages to ensure reproducibility, and is more
104+
# commonly ignored for libraries.
105+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106+
#poetry.lock
107+
108+
# pdm
109+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110+
#pdm.lock
111+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112+
# in version control.
113+
# https://pdm.fming.dev/#use-with-ide
114+
.pdm.toml
115+
116+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117+
__pypackages__/
118+
119+
# Celery stuff
120+
celerybeat-schedule
121+
celerybeat.pid
122+
123+
# SageMath parsed files
124+
*.sage.py
125+
126+
# Environments
127+
.env
128+
.venv
129+
env/
130+
venv/
131+
ENV/
132+
env.bak/
133+
venv.bak/
134+
135+
# Spyder project settings
136+
.spyderproject
137+
.spyproject
138+
139+
# Rope project settings
140+
.ropeproject
141+
142+
# mkdocs documentation
143+
/site
144+
145+
# mypy
146+
.mypy_cache/
147+
.dmypy.json
148+
dmypy.json
149+
150+
# Pyre type checker
151+
.pyre/
152+
153+
# pytype static type analyzer
154+
.pytype/
155+
156+
# Cython debug symbols
157+
cython_debug/
158+
159+
# PyCharm
160+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162+
# and can be added to the global gitignore or merged into this file. For a more nuclear
163+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
164+
#.idea/

LICENSE.md

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
MIT License
2+
3+
Copyright (c) 2023 Andrei Betlen
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6+
7+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8+
9+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

README.md

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# `llama.cpp` Python Bindings
2+
3+
Simple Python bindings for @ggerganov's [`llama.cpp`](https://github.com/ggerganov/llama.cpp) library.
4+
5+
These bindings expose the low-level `llama.cpp` C API through a complete `ctypes` interface.
6+
This module also exposes a high-level Python API that is more convenient to use and follows a familiar format.
7+
8+
# Install
9+
10+
```bash
11+
pip install llama_cpp
12+
```
13+
14+
# Usage
15+
16+
```python
17+
>>> from llama_cpp import Llama
18+
>>> llm = Llama(model_path="models/7B/...")
19+
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
20+
>>> print(output)
21+
{
22+
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
23+
"object": "text_completion",
24+
"created": 1679561337,
25+
"model": "models/7B/...",
26+
"choices": [
27+
{
28+
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
29+
"index": 0,
30+
"logprobs": None,
31+
"finish_reason": "stop"
32+
}
33+
],
34+
"usage": {
35+
"prompt_tokens": 14,
36+
"completion_tokens": 28,
37+
"total_tokens": 42
38+
}
39+
}
40+
```

examples/basic.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import json
2+
from llama_cpp import Llama
3+
4+
llm = Llama(model_path="models/...")
5+
6+
output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
7+
8+
print(json.dumps(output, indent=2))

llama_cpp/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .llama_cpp import *
2+
from .llama import *

llama_cpp/llama.py

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import uuid
2+
import time
3+
import multiprocessing
4+
from typing import List, Optional
5+
6+
from . import llama_cpp
7+
8+
class Llama:
9+
def __init__(
10+
self,
11+
model_path: str,
12+
n_ctx: int = 512,
13+
n_parts: int = -1,
14+
seed: int = 1337,
15+
f16_kv: bool = False,
16+
logits_all: bool = False,
17+
vocab_only: bool = False,
18+
n_threads: Optional[int] = None,
19+
model_name: Optional[str]=None,
20+
):
21+
self.model_path = model_path
22+
self.model = model_name or model_path
23+
24+
self.params = llama_cpp.llama_context_default_params()
25+
self.params.n_ctx = n_ctx
26+
self.params.n_parts = n_parts
27+
self.params.seed = seed
28+
self.params.f16_kv = f16_kv
29+
self.params.logits_all = logits_all
30+
self.params.vocab_only = vocab_only
31+
32+
self.n_threads = n_threads or multiprocessing.cpu_count()
33+
34+
self.tokens = (llama_cpp.llama_token * self.params.n_ctx)()
35+
36+
self.ctx = llama_cpp.llama_init_from_file(
37+
self.model_path.encode("utf-8"), self.params
38+
)
39+
40+
def __call__(
41+
self,
42+
prompt: str,
43+
suffix: Optional[str] = None,
44+
max_tokens: int = 16,
45+
temperature: float = 0.8,
46+
top_p: float = 0.95,
47+
echo: bool = False,
48+
stop: List[str] = [],
49+
repeat_penalty: float = 1.1,
50+
top_k: int = 40,
51+
):
52+
text = ""
53+
finish_reason = "length"
54+
completion_tokens = 0
55+
56+
prompt_tokens = llama_cpp.llama_tokenize(
57+
self.ctx, prompt.encode("utf-8"), self.tokens, self.params.n_ctx, True
58+
)
59+
60+
if prompt_tokens + max_tokens > self.params.n_ctx:
61+
raise ValueError(
62+
f"Requested tokens exceed context window of {self.params.n_ctx}"
63+
)
64+
65+
for i in range(prompt_tokens):
66+
llama_cpp.llama_eval(
67+
self.ctx, (llama_cpp.c_int * 1)(self.tokens[i]), 1, i, self.n_threads
68+
)
69+
70+
for i in range(max_tokens):
71+
token = llama_cpp.llama_sample_top_p_top_k(
72+
self.ctx,
73+
self.tokens,
74+
prompt_tokens + completion_tokens,
75+
top_k=top_k,
76+
top_p=top_p,
77+
temp=temperature,
78+
repeat_penalty=repeat_penalty,
79+
)
80+
if token == llama_cpp.llama_token_eos():
81+
finish_reason = "stop"
82+
break
83+
text += llama_cpp.llama_token_to_str(self.ctx, token).decode("utf-8")
84+
self.tokens[prompt_tokens + i] = token
85+
completion_tokens += 1
86+
87+
any_stop = [s for s in stop if s in text]
88+
if len(any_stop) > 0:
89+
first_stop = any_stop[0]
90+
text = text[: text.index(first_stop)]
91+
finish_reason = "stop"
92+
break
93+
94+
llama_cpp.llama_eval(
95+
self.ctx,
96+
(llama_cpp.c_int * 1)(self.tokens[prompt_tokens + i]),
97+
1,
98+
prompt_tokens + completion_tokens,
99+
self.n_threads,
100+
)
101+
102+
if echo:
103+
text = prompt + text
104+
105+
if suffix is not None:
106+
text = text + suffix
107+
108+
return {
109+
"id": f"cmpl-{str(uuid.uuid4())}", # Likely to change
110+
"object": "text_completion",
111+
"created": int(time.time()),
112+
"model": self.model, # Likely to change
113+
"choices": [
114+
{
115+
"text": text,
116+
"index": 0,
117+
"logprobs": None,
118+
"finish_reason": finish_reason,
119+
}
120+
],
121+
"usage": {
122+
"prompt_tokens": prompt_tokens,
123+
"completion_tokens": completion_tokens,
124+
"total_tokens": prompt_tokens + completion_tokens,
125+
},
126+
}
127+
128+
def __del__(self):
129+
llama_cpp.llama_free(self.ctx)
130+
131+

0 commit comments

Comments
 (0)