Skip to content

Commit 10cc956

Browse files
Release/v1.0.0 (#6)
* feat: adjustments to connect to open search * chore: deleting aws vars * fix: adjust the flow to make queries * feat: improve the server initaization * chore: delete logs * feat: create virtual network
1 parent a71e5b5 commit 10cc956

13 files changed

+262
-188
lines changed

.flaskenv

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
FLASK_APP = "main"
2-
FLASK_RUN_PORT = "8000"
2+
FLASK_RUN_PORT = "8080"
33
FLASK_DEBUG=true

Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ USER appuser
4343
COPY . .
4444

4545
# Expose the port that the application listens on.
46-
EXPOSE 8000
46+
EXPOSE 8080
4747

4848
# Run the application.
49-
CMD python3 -m flask run --host=0.0.0.0
49+
CMD python3 -m flask run --host=0.0.0.0

compose.yaml

-59
This file was deleted.

config.py

+3-16
Original file line numberDiff line numberDiff line change
@@ -13,41 +13,28 @@ class Config:
1313
Configuration class
1414
"""
1515

16-
DEBUG = environ.get("DEBUG")
1716
LOG_LEVEL = environ.get("LOG_LEVEL")
18-
S3_BUCKET = environ.get("S3_BUCKET")
1917
OPENSEARCH_INDEX = environ.get("OPENSEARCH_INDEX")
2018
OPENSEARCH_HOST = environ.get("OPENSEARCH_HOST")
2119
OPENSEARCH_PORT = environ.get("OPENSEARCH_PORT")
22-
OPENSEARCH_USE_SSL = environ.get("OPENSEARCH_USE_SSL")
23-
OPENSEARCH_VERIFY_CERTS = environ.get("OPENSEARCH_VERIFY_CERTS")
24-
IS_LOCAL = environ.get("IS_LOCAL")
25-
S3_URL = None
26-
S3_INDEX_PATH = environ.get("S3_INDEX_PATH")
2720
OPENSEARCH_USER = environ.get("OPENSEARCH_USER")
2821
OPENSEARCH_PASS = environ.get("OPENSEARCH_PASS")
22+
OPENSEARCH_USE_SSL = environ.get("OPENSEARCH_USE_SSL")
23+
OPENSEARCH_VERIFY_CERTS = environ.get("OPENSEARCH_VERIFY_CERTS")
24+
S3_URL = environ.get("S3_URL")
2925

3026

3127
class DevelopmentConfig(Config):
3228
"""
3329
Development configuration
3430
"""
3531

36-
DEBUG = True
3732
LOG_LEVEL = "DEBUG"
38-
OPENSEARCH_CLUSTER_URL = "http://host.docker.internal:9200"
3933
OPENSEARCH_INDEX = "clone-vector-index"
40-
# OPENSEARCH_HOST = "localhost"
4134
OPENSEARCH_HOST = "host.docker.internal"
4235
OPENSEARCH_PORT = "9200"
4336
OPENSEARCH_USER = ""
4437
OPENSEARCH_PASS = ""
4538
OPENSEARCH_USE_SSL = False
4639
OPENSEARCH_VERIFY_CERTS = False
47-
S3_BUCKET = "clone-ingestion-messages"
48-
IS_LOCAL = True
4940
S3_URL = "http://host.docker.internal:4566"
50-
AWS_ACCESS_KEY_ID = "test"
51-
AWS_SECRET_ACCESS_KEY = "test"
52-
AWS_DEFAULT_REGION = "us-east-1"
53-
S3_INDEX_PATH = "/indexes"

core/abstracts/services.py

+17-10
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,20 @@ class AbstractLlamaIndexService(ABC):
2929

3030
@abstractmethod
3131
def vector_store_index(
32-
self, twin_id: str, source_name: str, file_uuid: str, documents: list
32+
self,
33+
twin_id: str,
34+
source_name: str,
35+
channelId: str,
36+
file_uuid: str,
37+
documents: list,
3338
) -> str:
3439
"""
3540
Abstract method to indexing documents and store vectors in OpenSearch.
3641
3742
Args:
3843
twin_id (str): Identifier for the twin.
3944
source_name (str): Name of the data source.
45+
channelId (str): Channel identifier.
4046
file_uuid (str): UUID of the file containing the documents.
4147
documents (list): List of dictionaries representing documents.
4248
@@ -48,28 +54,29 @@ def vector_store_index(
4854
@abstractmethod
4955
def vectorize_string(self, text_input: str) -> list:
5056
"""
51-
Abstract method to indexing documents and store vectors in OpenSearch.
57+
Abstract method to indexing documents and store vectors in OpenSearch.
5258
53-
Args:
54-
text_input (str): A string to vectorize
59+
Args:
60+
text_input (str): A string to vectorize
5561
56-
Returns:
57-
list: a list of float values representing a vector
58-
"""
62+
Returns:
63+
list: a list of float values representing a vector
64+
"""
5965
pass
6066

6167

6268
class AbstractOpensearchService(ABC):
6369
"""
64-
Abstract class for Opensearch services
65-
"""
70+
Abstract class for Opensearch services
71+
"""
72+
6673
@abstractmethod
6774
def search(self, query: dict) -> list:
6875
"""
6976
Abstract method to query an opensearch index
7077
7178
Args:
72-
query (dict): Opensearch DSL query string
79+
query (dict): Opensearch query string
7380
7481
Returns:
7582
list: a list of results

core/controller/vector.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from logging import Logger
33
from typing import Any, Dict, Tuple
44

5-
from flask import jsonify, Response
5+
from flask import Response, jsonify
66

77
from core.abstracts.controller import AbstractVectorController
88
from core.abstracts.usescases import AbstractVectorizeUsecase
@@ -42,18 +42,24 @@ def vectoring(self, request: Dict[str, Any]) -> Tuple[Response, int]:
4242

4343
try:
4444
self.usecase.vectorize_and_index(s3_bucket, s3_object_key)
45-
return jsonify({"message": "Object vectorization succeeded!"}), HTTPStatus.OK
45+
return (
46+
jsonify({"message": "Object vectorization succeeded!"}),
47+
HTTPStatus.OK,
48+
)
4649
except Exception as e:
4750
self.logger.error(f"Failed to vectorize object {s3_object_key}")
4851
return jsonify({"error": str(e)}), HTTPStatus.INTERNAL_SERVER_ERROR
4952

5053
def search(self, request: Dict[str, Any]) -> Tuple[Response, int]:
5154
query = request["q"]
5255
if query is None or query.strip() == "":
53-
return jsonify({'error': 'query param "q" is required'}), HTTPStatus.BAD_REQUEST
56+
return (
57+
jsonify({"error": 'query param "q" is required'}),
58+
HTTPStatus.BAD_REQUEST,
59+
)
5460

5561
try:
5662
result = self.usecase.search(query)
57-
return jsonify({'results': result}), HTTPStatus.OK
63+
return jsonify({"results": result}), HTTPStatus.OK
5864
except Exception as e:
59-
return jsonify({'error': str(e)}), HTTPStatus.INTERNAL_SERVER_ERROR
65+
return jsonify({"error": str(e)}), HTTPStatus.INTERNAL_SERVER_ERROR

core/service/llama_index_service.py

+14-7
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ class LlamaIndexService(AbstractLlamaIndexService):
1919
"""
2020

2121
def __init__(
22-
self,
23-
vector_store: OpensearchVectorStore,
24-
logger: Logger,
22+
self,
23+
vector_store: OpensearchVectorStore,
24+
logger: Logger,
2525
):
2626
"""
2727
Initialize the LlamaIndexService.
@@ -37,14 +37,20 @@ def __init__(
3737
self.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
3838

3939
def vector_store_index(
40-
self, twin_id: str, source_name: str, file_uuid: str, documents: list
40+
self,
41+
twin_id: str,
42+
source_name: str,
43+
channelId: str,
44+
file_uuid: str,
45+
documents: list,
4146
) -> str:
4247
"""
4348
Index documents and store vectors in OpenSearch.
4449
4550
Args:
4651
twin_id (str): Identifier for the twin.
4752
source_name (str): Name of the data source.
53+
channelId (str): Identifier for the channel.
4854
file_uuid (str): UUID of the file containing the documents.
4955
documents (list): list of dictionary representing documents.
5056
@@ -64,28 +70,29 @@ def vector_store_index(
6470
metadata={
6571
"raw_text": message["text"],
6672
"user_name": message["user_name"],
73+
"user_id": message["user_id"],
6774
"processed_user": processed_user,
6875
"twin_id": twin_id,
6976
"source_name": source_name,
7077
"file_uuid": file_uuid,
78+
"channelId": channelId,
7179
},
7280
metadata_seperator=":",
7381
embedding=embed_value,
7482
)
7583
)
76-
self.logger.info(docs)
7784
try:
7885
index = VectorStoreIndex.from_documents(
7986
documents=docs,
8087
storage_context=self.storage_context,
8188
embed_model=self.embed_model,
8289
)
8390
self.logger.info(
84-
f"Indexing documents for {twin_id}/{source_name}/{file_uuid}: {index.summary}"
91+
f"Indexing documents for {twin_id}/{source_name}/{channelId}/{file_uuid}"
8592
)
8693
return index.summary
8794
except Exception as e:
88-
message_error = f"Error while indexing documents for {twin_id}/{source_name}/{file_uuid}"
95+
message_error = f"Error while indexing documents for {twin_id}/{source_name}/{channelId}/{file_uuid}"
8996
self.logger.error(e)
9097
raise ValueError(message_error)
9198

core/service/opensearch_service.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
import json
21
from logging import Logger
32

4-
from core.abstracts.services import AbstractOpensearchService
5-
63
from opensearchpy import OpenSearch
74

5+
from core.abstracts.services import AbstractOpensearchService
6+
87

98
class OpensearchService(AbstractOpensearchService):
109
"""
@@ -24,19 +23,19 @@ def __init__(self, opensearch_client: OpenSearch, index: str, logger: Logger):
2423
self.index = index
2524
self.logger = logger
2625

27-
def search(self, query: str) -> list:
26+
def search(self, query: dict) -> list:
2827
"""
2928
Performs a query to the configured index
3029
3130
Args:
32-
query (str): Opensearch DSL query string
33-
31+
query (dict): the query to perform
3432
Returns:
3533
list: a list of dictionaries with the opensearch query document results
3634
"""
3735
try:
3836
response = self.client.search(index=self.index, body=query)
39-
return response['hits']['hits']
37+
return response["hits"]["hits"]
4038
except Exception as e:
4139
error_message = f"Error while searching in OpenSearch: {str(e)}"
40+
self.logger.error(error_message)
4241
raise Exception(error_message)

0 commit comments

Comments
 (0)