@@ -75,8 +75,12 @@ def search_all(
75
75
search_one = functools .partial (self .__class__ ._search_one , top = top )
76
76
77
77
# Convert queries to a list for potential reuse
78
- queries_list = list (queries )
79
-
78
+ # Also, converts query vectors to bytes beforehand, preparing them for sending to client without affecting search time measurements
79
+ queries_list = []
80
+ for query in queries :
81
+ query .vector = np .array (query .vector ).astype (np .float32 ).tobytes ()
82
+ queries_list .append (query )
83
+
80
84
# Handle MAX_QUERIES environment variable
81
85
if MAX_QUERIES > 0 :
82
86
queries_list = queries_list [:MAX_QUERIES ]
@@ -114,12 +118,12 @@ def cycling_query_generator(queries, total_count):
114
118
total_query_count = len (used_queries )
115
119
116
120
if parallel == 1 :
117
- # Single-threaded execution
118
- start = time .perf_counter ()
119
-
120
121
# Create a progress bar with the correct total
121
122
pbar = tqdm .tqdm (total = total_query_count , desc = "Processing queries" , unit = "queries" )
122
123
124
+ # Single-threaded execution
125
+ start = time .perf_counter ()
126
+
123
127
# Process queries with progress updates
124
128
results = []
125
129
for query in used_queries :
@@ -148,42 +152,32 @@ def cycling_query_generator(queries, total_count):
148
152
# For lists, we can use the chunked_iterable function
149
153
query_chunks = list (chunked_iterable (used_queries , chunk_size ))
150
154
151
- # Function to be executed by each worker process
152
- def worker_function (chunk , result_queue ):
153
- self .__class__ .init_client (
154
- self .host ,
155
- distance ,
156
- self .connection_params ,
157
- self .search_params ,
158
- )
159
- self .setup_search ()
160
- results = process_chunk (chunk , search_one )
161
- result_queue .put (results )
162
-
163
155
# Create a queue to collect results
164
156
result_queue = Queue ()
165
157
166
158
# Create worker processes
167
159
processes = []
168
160
for chunk in query_chunks :
169
- process = Process (target = worker_function , args = (chunk , result_queue ))
161
+ process = Process (target = worker_function , args = (self , distance , search_one , chunk , result_queue ))
170
162
processes .append (process )
171
163
172
- # Start measuring time for the critical work
173
- start = time .perf_counter ()
174
-
175
164
# Start worker processes
176
165
for process in processes :
177
166
process .start ()
178
167
179
168
# Collect results from all worker processes
180
169
results = []
170
+ min_start_time = time .perf_counter ()
181
171
for _ in processes :
182
- chunk_results = result_queue .get ()
172
+ proc_start_time , chunk_results = result_queue .get ()
183
173
results .extend (chunk_results )
174
+
175
+ # Update min_start_time if necessary
176
+ if proc_start_time < min_start_time :
177
+ min_start_time = proc_start_time
184
178
185
179
# Stop measuring time for the critical work
186
- total_time = time .perf_counter () - start
180
+ total_time = time .perf_counter () - min_start_time
187
181
188
182
# Wait for all worker processes to finish
189
183
for process in processes :
@@ -226,13 +220,21 @@ def chunked_iterable(iterable, size):
226
220
while chunk := list (islice (it , size )):
227
221
yield chunk
228
222
223
+ # Function to be executed by each worker process
224
+ def worker_function (self , distance , search_one , chunk , result_queue ):
225
+ self .init_client (
226
+ self .host ,
227
+ distance ,
228
+ self .connection_params ,
229
+ self .search_params ,
230
+ )
231
+ self .setup_search ()
232
+
233
+ start_time = time .perf_counter ()
234
+ results = process_chunk (chunk , search_one )
235
+ result_queue .put ((start_time , results ))
229
236
230
237
def process_chunk (chunk , search_one ):
231
238
"""Process a chunk of queries using the search_one function."""
232
239
# No progress bar in worker processes to avoid cluttering the output
233
240
return [search_one (query ) for query in chunk ]
234
-
235
-
236
- def process_chunk_wrapper (chunk , search_one ):
237
- """Wrapper to process a chunk of queries."""
238
- return process_chunk (chunk , search_one )
0 commit comments