Skip to content

Commit

Permalink
Merge pull request #17 from sandbox-ai/revert-16-cleanup+logging
Browse files Browse the repository at this point in the history
Revert "Limpie api.py y agregue logging para preguntas individuales"
  • Loading branch information
pablodelucca authored Feb 11, 2024
2 parents 0adad7e + 05e88ce commit 0c2af82
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 140 deletions.
262 changes: 141 additions & 121 deletions backend/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from flask import Flask
from flask_cors import CORS
from flask import Blueprint, request, jsonify, current_app
from flask import Blueprint, request, jsonify
from flask import Response

import sys
Expand All @@ -22,21 +22,84 @@
# Set up logging level:
logging.basicConfig(level=logging.INFO)

# Set up Flask home Blueprint:
# Path to session configuration file:
config_filepath = r"./config.json"

# Set up RAG session:
#session = RAGSession(config_filepath)

home = Blueprint('home_views', __name__)

try:
# Set up session:
#session.set_up()
hello = "hello"

except Exception as e:
# Extract error info:
tb = traceback.extract_tb(e.__traceback__)
filename, line, func, text = tb[-1]

# Log the error
error_message = textwrap.dedent(f"""\
========================================
|| ERROR ||
========================================
File: {filename}
Function name: {func}
Line {line}:
{text}
Error: {e}""")

#logger.save_string(error_message, session.logging_filepath)
raise

file_path_dnu = "./data/LaLeyDeMilei-raw/decreto_flat.json"
file_path_dnu_unpreppended = (
"./data/LaLeyDeMilei-raw/decreto_flat_unpreppended.json"
)
file_path_vectorstore = "./data/dnu_vectorstore.json"


data_loader = DataLoader()
dnu = data_loader.load_json("./data/LaLeyDeMilei-raw/decreto_flat.json")
dnu_unpreppended = data_loader.load_json(
"./data/LaLeyDeMilei-raw/decreto_flat_unpreppended.json"
)

dnu_metadata = data_loader.load_json("./data/dnu_metadata.json")

# WILL THIS LOAD EVERYTIME someone asks a question?
#embedder = Embedder("dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn")

if not os.path.exists(file_path_vectorstore):
vectorstore = embedder.embed_text(dnu)
VectorStoreManager.save_vectorstore(file_path_vectorstore, vectorstore)
else:
vectorstore = VectorStoreManager.read_vectorstore(file_path_vectorstore)

embedder = Embedder("dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn")

# Initialize the QueryEngine with necessary parameters
query_engine = QueryEngine(vectorstore, embedder, legal_docs=dnu, legal_metadata=dnu_metadata, top_k=5)



class Handler(http.server.SimpleHTTPRequestHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, directory="../frontend/dist/", **kwargs)


def flaskServer(ip='127.0.0.1', port=5000):
app = create_application()
# app.run(host=ip, port=port, debug=True)
logging.info("Flask serving...")
app.run(port=port, debug=True, host=ip, use_reloader=False)


class Handler(http.server.SimpleHTTPRequestHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, directory="../frontend/dist/", **kwargs)


def httpServer():
PORT = 4200
logging.info("HTTPD serving... http://127.0.0.1:4200")
Expand Down Expand Up @@ -65,75 +128,63 @@ def r_heartbeat():
################################################
@home.route("/api/question", methods=["POST"])
@home.route("/question", methods=["POST"])
def r_question():
# Set up log file:
logging_filepath = logger.create_log_file("La Ley de Milei", {}, os.path.join(logging_dir, "LaLeyDeMilei"))

# Access query_engine from the Flask application context:
query_engine = current_app.config['QUERY_ENGINE']

try:
# Get user query:
user_query = request.get_json().get("question", "")
logger.save_user_message(user_query, logging_filepath)

# Use the query_similarity method to find chunks similar to the query:
top_k_docs, matching_docs = query_engine.query_similarity(query=user_query)

# WITHOUT STREAMING
# Respond user query:
text = query_engine.generate_llm_response(
query=user_query,
client=OpenAI(),
model_name='gpt-3.5-turbo-0125',
temperature=0,
max_tokens=2000,
streaming=False, #True, #False,
top_k_docs=top_k_docs,
matching_docs=matching_docs,
)

citations = get_stored_citations(top_k_docs, dnu_metadata)
#citations = query_engine.generate_complete_citations_dict(matching_docs, top_k_docs)
#citations = query_engine.get_stored_citations(top_k_docs, dnu_metadata)

# Log bot response:
logger.save_bot_message(text, logging_filepath, citations=citations)

sources = []
for citation in citations.values():
metadata = citation['metadata']
source_text = citation['text'].strip('\n')
if metadata['documento'].lower() == 'decreto':
source = f'{metadata["documento"]}\n{metadata["titulo"]}\n{metadata["capitulo"] + " - " if "capitulo" in metadata else ""}{metadata["articulo"]}\n\n"{source_text}"'
else:
source = f'{metadata["documento"]}\n\n"{source_text}"'
sources.append(source)
logging.info(source)

return jsonify(answer=text, sources=sources, error="OK")

except Exception as e:
# Extract error info:
tb = traceback.extract_tb(e.__traceback__)
filename, line, func, text = tb[-1]

# Log the error
error_message = textwrap.dedent(f"""\
========================================
|| ERROR ||
========================================
File: {filename}
Function name: {func}
Line {line}:
{text}
Error: {e}""")

logger.save_string(error_message, logging_filepath)
raise
def r_question(embedder = embedder, query_engine = query_engine):
json_result = request.get_json()
user_query = json_result.get("question", "")

logging.info(f"Param received")
logging.info(f"Question : {user_query}")


# Use the query_similarity method to find documents similar to the query
top_k_docs, matching_docs = query_engine.query_similarity(
query=user_query
)



#WITHOUT STREAMING

#Respond user query:
text = query_engine.generate_llm_response(
query=user_query,
client=OpenAI(),
model_name='gpt-3.5-turbo-0125',
temperature=0,
max_tokens=2000,
streaming=False, #True, #False,
top_k_docs=top_k_docs,
matching_docs=matching_docs,
)

citations = get_stored_citations(top_k_docs, dnu_metadata)


#citations = query_engine.generate_complete_citations_dict(matching_docs, top_k_docs)
#citations = query_engine.get_stored_citations(top_k_docs, dnu_metadata)




#logging.info(f"Response Answer: {text}")

sources = []
for citation in citations.values():
metadata = citation['metadata']
source_text = citation['text'].strip('\n')
if metadata['documento'].lower() == 'decreto':
source = f'{metadata["documento"]}\n{metadata["titulo"]}\n{metadata["capitulo"] + " - " if "capitulo" in metadata else ""}{metadata["articulo"]}\n\n"{source_text}"'
else:
source = f'{metadata["documento"]}\n\n"{source_text}"'
sources.append(source)
logging.info(source)

return jsonify(answer=text, sources=sources, error="OK")



# WITH STREAMING

#def generate_stream():
#for chunk in query_engine.generate_llm_response2(
#query=user_query,
Expand All @@ -155,71 +206,40 @@ def r_question():
#return Response(generate_stream(), content_type='application/json')



if __name__ == '__main__':
# Create parser:
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--ip', action='store', default='127.0.0.1', help='IP address, just for vagrant')
parser.add_argument('-p', '--port', action='store', default=5000, help='Listen port')
parser.add_argument('-e', '--env', action='store', default='dev', help='Environment [dev, prod]')
parser.add_argument('-i', '--ip', action='store', default='127.0.0.1',
help='IP address, just for vagrant')
parser.add_argument('-p', '--port', action='store', default=5000,
help='Listen port')
parser.add_argument('-e', '--env', action='store', default='dev',
help='Environment [dev, prod]')

# Extract args from parser:
args = parser.parse_args()
ip = str(args.ip)
port = str(args.port)
env = str(args.env)

# Set up logging file:
logging_dir = "./logs/"

# Load processed DNU:
data_loader = DataLoader()
dnu = data_loader.load_json("./data/LaLeyDeMilei-raw/decreto_flat.json")
dnu_metadata = data_loader.load_json("./data/dnu_metadata.json")

# Load embeddings model:
embedder = Embedder("dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn")

# Load / Create vectorstore:
file_path_vectorstore = "./data/dnu_vectorstore.json"
if os.path.exists(file_path_vectorstore):
vectorstore = VectorStoreManager.read_vectorstore(file_path_vectorstore)
else:
vectorstore = embedder.embed_text(dnu)
VectorStoreManager.save_vectorstore(file_path_vectorstore, vectorstore)

# Initialize query engine:
query_engine = QueryEngine(
vectorstore,
embedder,
legal_docs=dnu,
legal_metadata=dnu_metadata,
top_k=5
)

# Create Flask app:
app = create_application()
# Load query engine into app:
app.config['QUERY_ENGINE'] = query_engine

# Check if environment is set to production:
if env == 'prod':
if (env == 'prod'):
sys.stdout.flush()
# Initialize a separate process for the Flask server:
flask_proc = multiprocessing.Process(name='flask', target=flaskServer, kwargs={"ip": ip, "port": port})
flask_proc.daemon = True # Automatically terminate when the main process ends
kwargs_flask = {"ip": ip, "port": port}
flask_proc = multiprocessing.Process(name='flask',
target=flaskServer,
kwargs=kwargs_flask)
flask_proc.daemon = True

sys.stdout.flush()
# Initialize a separate process for the HTTP server:
httpd_proc = multiprocessing.Process(name='httpd', target=httpServer)
httpd_proc.daemon = True # Automatically terminate when the main process ends
httpd_proc = multiprocessing.Process(name='httpd',
target=httpServer)
httpd_proc.daemon = True

# Start parallel processes:
flask_proc.start()
httpd_proc.start()
# Wait until manual termination or error:
flask_proc.join()
httpd_proc.join()
else:
app.run(port=int(port), debug=True, host=ip, use_reloader=False)

else:
app.run(port=port, debug=True, host=ip, use_reloader=False)

1 change: 1 addition & 0 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
logging_filepath = logger.create_log_file(bot_name, config, os.path.join(logging_dir, session_name))

try:

# Take user query:
user_query = input("Pregunta sobre el DNU impulsado por el presidente Javier Milei: ")

Expand Down
4 changes: 2 additions & 2 deletions backend/src/custom_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def save_user_message(user_message: str, filepath: str):
f.write(formatted_message)


def save_bot_message(bot_message: str, filepath: str, citations: dict = None, bot_name: str = "Bot"):
def save_bot_message(bot_message: str, bot_name: str, filepath: str, citations: dict = None):
citation_text = ""
if citations is not None:
citation_text = "Sources:"
Expand All @@ -55,7 +55,7 @@ def save_bot_message(bot_message: str, filepath: str, citations: dict = None, bo
========================================
{bot_name}: {bot_message}
----------------------------------
----------------------------------------
{citation_text}
"""
)
Expand Down
Loading

0 comments on commit 0c2af82

Please sign in to comment.