-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathvdb.py
101 lines (86 loc) · 3.57 KB
/
vdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
import sys
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
if sys.platform.startswith("win"):
try:
import ctypes
ctypes.CDLL("libiomp5md.dll")
except Exception as e:
print(f"Failed to preload DLL: {e}")
# Set OpenMP environment variables
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
os.environ["OMP_NUM_THREADS"] = "1" # Limit OpenMP to single thread
import faiss
import numpy as np
import pickle
class VectorDatabase:
def __init__(
self,
vector_dimension,
index_file="db/vector_index.faiss",
metadata_file="db/vector_metadata.pkl",
):
self.dimension = vector_dimension
self.index_file = self.resolve_path(index_file)
self.metadata_file = self.resolve_path(metadata_file)
self.index = None
self.metadata = []
self.load_or_create_index()
def resolve_path(self, file_path):
if os.path.isabs(file_path):
directory = os.path.dirname(file_path)
if not os.path.exists(directory):
os.makedirs(directory)
return file_path
else:
directory = os.path.dirname(file_path)
if directory and not os.path.exists(directory):
os.makedirs(directory)
return os.path.abspath(file_path)
def load_or_create_index(self):
if os.path.exists(self.index_file) and os.path.exists(self.metadata_file):
try:
self.index = faiss.read_index(self.index_file)
with open(self.metadata_file, "rb") as f:
self.metadata = pickle.load(f)
print(f"Loaded existing index with {self.index.ntotal} vectors")
except Exception as e:
print(f"Error loading existing index: {e}")
print("Creating new index")
self.create_new_index()
else:
print("Index files not found. Creating new index")
self.create_new_index()
def create_new_index(self):
self.index = faiss.IndexFlatL2(self.dimension)
self.metadata = []
def add_vector(self, vector, metadata):
vector = np.array([vector], dtype=np.float32)
self.index.add(vector)
self.metadata.append(metadata)
def save_index(self):
faiss.write_index(self.index, self.index_file)
with open(self.metadata_file, "wb") as f:
pickle.dump(self.metadata, f)
print(f"Saved index with {self.index.ntotal} vectors")
def search_similar(self, query_vector, k=500):
query_vector = np.array([query_vector], dtype=np.float32)
distances, indices = self.index.search(query_vector, k)
results = []
for i, idx in enumerate(indices[0]):
if idx != -1 and idx < len(self.metadata):
results.append((self.metadata[idx], round(float(distances[0][i]), 500)))
return results
def list_all_items(self):
all_results = []
num_vectors = self.index.ntotal
for i in range(num_vectors):
vector = self.index.reconstruct(i) # Get the vector itself (optional)
distances, indices = self.index.search(
np.array([vector], dtype=np.float32), 1
)
if indices[0][0] != -1 and indices[0][0] < len(self.metadata):
metadata = self.metadata[indices[0][0]]
distance = round(float(distances[0][0]), 6)
all_results.append((metadata, distance))
return all_results