diff --git a/backend/api/app.py b/backend/api/app.py index 435ad15..d93bf07 100644 --- a/backend/api/app.py +++ b/backend/api/app.py @@ -190,6 +190,7 @@ class GetPresignedUrlResponse(BaseModel): account_id: Optional[str] = None # uuid really +# TODO(P0, ux): Make sure this works with uploading files too; not just the recorded ones @app.get("/upload/voice", response_model=GetPresignedUrlResponse) async def get_presigned_url(request: Request, response: Response, current_user: Optional[UserFrontEnd] = Depends( maybe_get_current_user diff --git a/backend/app/app.py b/backend/app/app.py index e53e30e..8b4ae93 100644 --- a/backend/app/app.py +++ b/backend/app/app.py @@ -16,11 +16,11 @@ from app.contacts_dump import run_executive_assistant_to_get_drafts from app.datashare import PersonDataEntry from app.emails import ( - send_result, + send_networking_per_person_result, send_result_no_people_found, send_result_rest_of_the_crowd, send_technical_failure_email, - wait_for_email_updated_on_data_entry, + wait_for_email_updated_on_data_entry, send_generic_result, ) from app.food_dump import run_food_ingredient_extraction from app.form_library import FormName @@ -34,7 +34,7 @@ SKIP_SHARE_SPREADSHEET, POSTGRES_LOGIN_URL_FROM_ENV, ) from gpt_form_filler.form import FormData -from gpt_form_filler.openai_client import CHEAPEST_MODEL, OpenAiClient +from gpt_form_filler.openai_client import CHEAPEST_MODEL, OpenAiClient, BEST_MODEL from common.gpt_client import open_ai_client_with_db_cache from common.twillio_client import TwilioClient @@ -59,6 +59,7 @@ APP_UPLOADS_BUCKET = "requests-from-api-voxana" EMAIL_BUCKET = "draft-requests-from-ai-mail-voxana" PHONE_RECORDINGS_BUCKET = "requests-from-twilio" +GPTo_MODEL = "gpt-4o-2024-08-06" # RESPONSE_EMAILS_MAX_PER_DATA_ENTRY = 3 @@ -129,7 +130,7 @@ def sync_form_datas_to_gsheets(account_id: uuid.UUID, form_datas: List[FormData] # TODO(P0, dumpsheet migration): This is trying to be over-smart, we should just have the user to choose the sheet. -def get_workflow_name(gpt_client: OpenAiClient, transcript: str) -> FormName: +def get_workflow_name(gpt_client: OpenAiClient, transcript: str) -> Optional[FormName]: topics = "\n".join( f"* {name} -> {description}" for name, description in FORM_CLASSIFICATION.items() @@ -149,7 +150,7 @@ def get_workflow_name(gpt_client: OpenAiClient, transcript: str) -> FormName: print(f"classified transcript as {raw_response}") return FormName.from_str(raw_response) - default_classification = FormName.CONTACTS + default_classification = None print( f"WARNING: classified transcript as unknown type: {raw_response}; defaulting to {default_classification}" ) @@ -223,7 +224,7 @@ def process_networking_transcript( # SEND EMAILS for person in legit_results: # if user.contact_method() == "email": - send_result( + send_networking_per_person_result( account_id=data_entry.account_id, idempotency_id_prefix=data_entry.idempotency_id, person=person, @@ -358,6 +359,39 @@ def first_lambda_handler_wrapper(event, context) -> BaseDataEntry: return data_entry +def process_generic_prompt(gpt_client, data_entry) -> str: + print("process_generic_prompt for data_entry", data_entry.id) + + task = Task.create_task(workflow_name="generic_id", data_entry_id=data_entry.id) + # TODO(P1, devx): With gpt-form-filler migration, we lost the task_id setting. Would be nice to have it back. + # gpt_client.set_task_id(task.id) + + format_prompt = "Respond to this prompt as a human executive assistant in a plaintext email: " + result = gpt_client.run_prompt(format_prompt + data_entry.output_transcript, model=BEST_MODEL) + + transcript_prompt = """ + Just reformat this transcript, omit filler words, better sentence structure, keep the wording, + add paragraphs if needed. Especially make sure you keep all mentioned facts and details. + """ + # CHEAPEST_MODEL leads to overly short answers. + transcription = gpt_client.run_prompt(transcript_prompt + data_entry.output_transcript, model=GPTo_MODEL) + + if not wait_for_email_updated_on_data_entry(data_entry.id, max_wait_seconds=3 * 60): + print( + f"WARNING: email missing for data_entry {data_entry.id} - cannot send results email" + ) + + # sync_form_datas_to_gsheets(data_entry.account_id, form_datas=form_datas) + send_generic_result( + account_id=data_entry.account_id, + idempotency_id=data_entry.idempotency_id + "-generic-result", + email_subject=f"Re: {data_entry.display_name}", + email_body=result + "\n\n === Transcription === \n\n" + transcription, + ) + + return result + + def second_lambda_handler_wrapper(data_entry: BaseDataEntry): if not wait_for_email_updated_on_data_entry(data_entry.id, max_wait_seconds=5 * 60): print( @@ -367,24 +401,18 @@ def second_lambda_handler_wrapper(data_entry: BaseDataEntry): acc: BaseAccount = BaseAccount.get_by_id(data_entry.account_id) print(f"gonna process transcript for account {acc.__dict__}") - if ( - str(data_entry.account.id) == "c6b5882d-929a-41c5-8eb0-3740965b8e8e" - or ENV == ENV_LOCAL - ): - if ( - get_workflow_name(gpt_client, data_entry.output_transcript) - == FormName.FOOD_LOG - ): - return process_food_log_transcript( - gpt_client=gpt_client, data_entry=data_entry - ) + suggested_workflow_name = get_workflow_name(gpt_client, data_entry.output_transcript) + if suggested_workflow_name == FormName.CONTACTS: + process_networking_transcript( + gpt_client=gpt_client, + data_entry=data_entry, + ) + if suggested_workflow_name == FormName.FOOD_LOG: + process_food_log_transcript( + gpt_client=gpt_client, data_entry=data_entry + ) - # Our OG product - # NOTE: When we actually separate them - be careful about re-tries to clear the output. - return process_networking_transcript( - gpt_client=gpt_client, - data_entry=data_entry, - ) + process_generic_prompt(gpt_client, data_entry) def _event_idempotency_id(event): @@ -443,7 +471,7 @@ def lambda_handler(event, context): open_ai_client = open_ai_client_with_db_cache() # open_ai_client.run_prompt(f"test {time.time()}") - test_case = "app" # FOR EASY TEST CASE SWITCHING + test_case = "email" # FOR EASY TEST CASE SWITCHING orig_data_entry = None if test_case == "app": app_account = Account.get_or_onboard_for_email( @@ -460,15 +488,14 @@ def lambda_handler(event, context): test_parsing_too = parse_uuid_from_string( f"folder/{app_data_entry_id}.webm" ) + # TODO: remember what all this setup shabang does orig_data_entry = process_app_upload( gpt_client=open_ai_client, - # audio_filepath="testdata/app-silent-audio.webm", - audio_filepath="testdata/sequioa-guy.webm", + audio_filepath="testdata/brainfarting-boomergpt-mail.m4a", data_entry_id=test_parsing_too, ) if test_case == "email": - # with open("testdata/katka-new-draft-test", "rb") as handle: - with open("testdata/katka-middle-1", "rb") as handle: + with open("testdata/boomergpt-mail-email", "rb") as handle: file_contents = handle.read() orig_data_entry = process_email_input( gpt_client=open_ai_client, @@ -515,4 +542,9 @@ def lambda_handler(event, context): data_entry=orig_data_entry, ) + process_generic_prompt( + gpt_client=open_ai_client, + data_entry=orig_data_entry + ) + EmailLog.save_last_email_log_to("result-app-app.html") diff --git a/backend/app/contacts_dump.py b/backend/app/contacts_dump.py index 58e7690..72c3978 100644 --- a/backend/app/contacts_dump.py +++ b/backend/app/contacts_dump.py @@ -7,7 +7,6 @@ DEFAULT_MODEL, OpenAiClient, gpt_response_to_json, - num_tokens_from_string, ) # Min transcript size somewhat trims down on "hallucinations" @@ -17,6 +16,13 @@ MAX_TRANSCRIPT_TOKEN_COUNT = 2500 # words +# https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them +def poor_mans_token_counter(text: str) -> int: + by_character = len(text) / 4 + by_words = 3 * len(text.split()) // 4 + return int(by_character + by_words) // 2 + + # TODO(P1, devx): Historically, this query give me most of the headaches. # * GPT-4 suggests using Named Entity Recognition (NER) - with nodes and edges. # * If it remains a problem - maybe just do it one-by-one, screw token cost. @@ -25,7 +31,7 @@ def extract_everyone_i_have_talked_to( gpt_client: OpenAiClient, full_transcript: str ) -> List: # NOTE: We shorten the string by words cause easier, but we better estimate the token count by OpenAI counter. - token_count = num_tokens_from_string(full_transcript) + token_count = poor_mans_token_counter(full_transcript) print(f"Transcript has {token_count} words and {len(full_transcript)} characters") # This can happen for either super-short, or silent uploads @@ -47,7 +53,7 @@ def extract_everyone_i_have_talked_to( # https://openai.com/blog/function-calling-and-other-api-updates # TODO(P0, ux): Still often-times it treats "Katka" and "Katka Sabo" as different people. query_people = """ - This is a voice note from a meeting or event where I talked to one or multiple people. + This is a transcribed voice note. List everybody I have directly talked to, omit mentions of other people in our conversation. Output a valid json list of strings of the people I have directly talked to - sometimes I don't recall their names so use a short description. @@ -55,6 +61,7 @@ def extract_everyone_i_have_talked_to( """.format( full_transcript ) + # TODO(ux): Maybe worth using GPT4-32k here, also I though I have changed these? raw_response = gpt_client.run_prompt(query_people) if raw_response is None: print("WARNING: Likely no people found in the input transcript") @@ -260,7 +267,7 @@ def run_executive_assistant_to_get_drafts( f"WARNING: full_transcript length too short {MIN_FULL_TRANSCRIPT_CHAR_LENGTH}" ) - token_count = num_tokens_from_string(full_transcript) + token_count = poor_mans_token_counter(full_transcript) print(f"extract_context_per_person on raw_transcript of {token_count} token count") people = extract_everyone_i_have_talked_to(gpt_client, full_transcript) diff --git a/backend/app/emails.py b/backend/app/emails.py index b7cdff0..c3a4e2c 100644 --- a/backend/app/emails.py +++ b/backend/app/emails.py @@ -179,7 +179,7 @@ def create_raw_email_with_attachments(params: EmailLog):
""" - + params.body_text ++ + (params.body_text.replace("\n", "