-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2.create_index.py
88 lines (76 loc) · 3.43 KB
/
2.create_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import marqo
import os
from dotenv import load_dotenv
load_dotenv()
MARQO_API_URL = os.getenv("MARQO_API_URL", "http://localhost:8882")
MARQO_API_KEY = os.getenv("MARQO_API_KEY", None)
INDEX_NAME = os.getenv("INDEX_NAME", "amazon-example")
MODEL = "open_clip/ViT-B-16-quickgelu/metaclip_fullcc"
VECTOR_NUMERIC_TYPE = "bfloat16"
def main():
mq = marqo.Client(MARQO_API_URL, api_key=MARQO_API_KEY)
# check if the index already exists
indexes = mq.get_indexes()
for index in indexes["results"]:
if index["indexName"] == INDEX_NAME:
choice = input(
f"Index {INDEX_NAME} already exists. Do you want to delete it? (y/n): "
)
while choice not in ["y", "n"]:
choice = input("Please enter 'y' or 'n': ")
if choice == "y":
mq.delete_index(INDEX_NAME)
print("Index deleted successfully!")
else:
print("Exiting...")
return
index_settings = {
"type": "structured",
"model": MODEL,
"normalizeEmbeddings": True,
"vectorNumericType": VECTOR_NUMERIC_TYPE,
"annParameters": {
"spaceType": "prenormalized-angular",
"parameters": {"efConstruction": 512, "m": 16},
},
"allFields": [
{"name": "main_category", "type": "text", "features": ["filter"]},
{"name": "title", "type": "text", "features": ["lexical_search"]},
{"name": "store", "type": "text", "features": ["lexical_search", "filter"]},
{"name": "features", "type": "array<text>", "features": ["lexical_search"]},
{"name": "description", "type": "text", "features": ["lexical_search"]},
{"name": "categories", "type": "array<text>", "features": ["filter"]},
{"name": "average_rating", "type": "float", "features": ["score_modifier"]},
{"name": "rating_number", "type": "float", "features": ["score_modifier"]},
{"name": "price", "type": "float", "features": ["score_modifier"]},
{"name": "details", "type": "text", "features": ["lexical_search"]},
{"name": "product_image", "type": "image_pointer"},
{
"name": "multimodal_image_title",
"type": "multimodal_combination",
"dependentFields": {"product_image": 0.9, "title": 0.1},
},
{"name": "sponsored", "type": "bool", "features": ["filter"]},
{
"name": "bid_amount",
"type": "float",
"features": ["filter", "score_modifier"],
},
],
"tensorFields": ["multimodal_image_title"],
}
if MARQO_API_KEY is not None:
index_settings["inferenceType"] = os.getenv("INFERENCE_TYPE", "marqo.GPU")
index_settings["numberOfInferences"] = os.getenv("NUMBER_OF_INFERENCES", 1)
index_settings["storageClass"] = os.getenv("STORAGE_CLASS", "marqo.basic")
index_settings["numberOfShards"] = os.getenv("NUMBER_OF_SHARDS", 1)
index_settings["numberOfReplicas"] = os.getenv("NUMBER_OF_REPLICAS", 0)
mq.create_index(INDEX_NAME, settings_dict=index_settings)
print("Index created successfully!")
# this triggers marqo to download the model
# not needed with Marqo Cloud
print("Warming the model...")
mq.index(INDEX_NAME).search("")
print("Model warmed up!")
if __name__ == "__main__":
main()