diff --git a/backend/api/app.py b/backend/api/app.py index 435ad15..d93bf07 100644 --- a/backend/api/app.py +++ b/backend/api/app.py @@ -190,6 +190,7 @@ class GetPresignedUrlResponse(BaseModel): account_id: Optional[str] = None # uuid really +# TODO(P0, ux): Make sure this works with uploading files too; not just the recorded ones @app.get("/upload/voice", response_model=GetPresignedUrlResponse) async def get_presigned_url(request: Request, response: Response, current_user: Optional[UserFrontEnd] = Depends( maybe_get_current_user diff --git a/backend/app/app.py b/backend/app/app.py index e53e30e..8b4ae93 100644 --- a/backend/app/app.py +++ b/backend/app/app.py @@ -16,11 +16,11 @@ from app.contacts_dump import run_executive_assistant_to_get_drafts from app.datashare import PersonDataEntry from app.emails import ( - send_result, + send_networking_per_person_result, send_result_no_people_found, send_result_rest_of_the_crowd, send_technical_failure_email, - wait_for_email_updated_on_data_entry, + wait_for_email_updated_on_data_entry, send_generic_result, ) from app.food_dump import run_food_ingredient_extraction from app.form_library import FormName @@ -34,7 +34,7 @@ SKIP_SHARE_SPREADSHEET, POSTGRES_LOGIN_URL_FROM_ENV, ) from gpt_form_filler.form import FormData -from gpt_form_filler.openai_client import CHEAPEST_MODEL, OpenAiClient +from gpt_form_filler.openai_client import CHEAPEST_MODEL, OpenAiClient, BEST_MODEL from common.gpt_client import open_ai_client_with_db_cache from common.twillio_client import TwilioClient @@ -59,6 +59,7 @@ APP_UPLOADS_BUCKET = "requests-from-api-voxana" EMAIL_BUCKET = "draft-requests-from-ai-mail-voxana" PHONE_RECORDINGS_BUCKET = "requests-from-twilio" +GPTo_MODEL = "gpt-4o-2024-08-06" # RESPONSE_EMAILS_MAX_PER_DATA_ENTRY = 3 @@ -129,7 +130,7 @@ def sync_form_datas_to_gsheets(account_id: uuid.UUID, form_datas: List[FormData] # TODO(P0, dumpsheet migration): This is trying to be over-smart, we should just have the user to choose the sheet. -def get_workflow_name(gpt_client: OpenAiClient, transcript: str) -> FormName: +def get_workflow_name(gpt_client: OpenAiClient, transcript: str) -> Optional[FormName]: topics = "\n".join( f"* {name} -> {description}" for name, description in FORM_CLASSIFICATION.items() @@ -149,7 +150,7 @@ def get_workflow_name(gpt_client: OpenAiClient, transcript: str) -> FormName: print(f"classified transcript as {raw_response}") return FormName.from_str(raw_response) - default_classification = FormName.CONTACTS + default_classification = None print( f"WARNING: classified transcript as unknown type: {raw_response}; defaulting to {default_classification}" ) @@ -223,7 +224,7 @@ def process_networking_transcript( # SEND EMAILS for person in legit_results: # if user.contact_method() == "email": - send_result( + send_networking_per_person_result( account_id=data_entry.account_id, idempotency_id_prefix=data_entry.idempotency_id, person=person, @@ -358,6 +359,39 @@ def first_lambda_handler_wrapper(event, context) -> BaseDataEntry: return data_entry +def process_generic_prompt(gpt_client, data_entry) -> str: + print("process_generic_prompt for data_entry", data_entry.id) + + task = Task.create_task(workflow_name="generic_id", data_entry_id=data_entry.id) + # TODO(P1, devx): With gpt-form-filler migration, we lost the task_id setting. Would be nice to have it back. + # gpt_client.set_task_id(task.id) + + format_prompt = "Respond to this prompt as a human executive assistant in a plaintext email: " + result = gpt_client.run_prompt(format_prompt + data_entry.output_transcript, model=BEST_MODEL) + + transcript_prompt = """ + Just reformat this transcript, omit filler words, better sentence structure, keep the wording, + add paragraphs if needed. Especially make sure you keep all mentioned facts and details. + """ + # CHEAPEST_MODEL leads to overly short answers. + transcription = gpt_client.run_prompt(transcript_prompt + data_entry.output_transcript, model=GPTo_MODEL) + + if not wait_for_email_updated_on_data_entry(data_entry.id, max_wait_seconds=3 * 60): + print( + f"WARNING: email missing for data_entry {data_entry.id} - cannot send results email" + ) + + # sync_form_datas_to_gsheets(data_entry.account_id, form_datas=form_datas) + send_generic_result( + account_id=data_entry.account_id, + idempotency_id=data_entry.idempotency_id + "-generic-result", + email_subject=f"Re: {data_entry.display_name}", + email_body=result + "\n\n === Transcription === \n\n" + transcription, + ) + + return result + + def second_lambda_handler_wrapper(data_entry: BaseDataEntry): if not wait_for_email_updated_on_data_entry(data_entry.id, max_wait_seconds=5 * 60): print( @@ -367,24 +401,18 @@ def second_lambda_handler_wrapper(data_entry: BaseDataEntry): acc: BaseAccount = BaseAccount.get_by_id(data_entry.account_id) print(f"gonna process transcript for account {acc.__dict__}") - if ( - str(data_entry.account.id) == "c6b5882d-929a-41c5-8eb0-3740965b8e8e" - or ENV == ENV_LOCAL - ): - if ( - get_workflow_name(gpt_client, data_entry.output_transcript) - == FormName.FOOD_LOG - ): - return process_food_log_transcript( - gpt_client=gpt_client, data_entry=data_entry - ) + suggested_workflow_name = get_workflow_name(gpt_client, data_entry.output_transcript) + if suggested_workflow_name == FormName.CONTACTS: + process_networking_transcript( + gpt_client=gpt_client, + data_entry=data_entry, + ) + if suggested_workflow_name == FormName.FOOD_LOG: + process_food_log_transcript( + gpt_client=gpt_client, data_entry=data_entry + ) - # Our OG product - # NOTE: When we actually separate them - be careful about re-tries to clear the output. - return process_networking_transcript( - gpt_client=gpt_client, - data_entry=data_entry, - ) + process_generic_prompt(gpt_client, data_entry) def _event_idempotency_id(event): @@ -443,7 +471,7 @@ def lambda_handler(event, context): open_ai_client = open_ai_client_with_db_cache() # open_ai_client.run_prompt(f"test {time.time()}") - test_case = "app" # FOR EASY TEST CASE SWITCHING + test_case = "email" # FOR EASY TEST CASE SWITCHING orig_data_entry = None if test_case == "app": app_account = Account.get_or_onboard_for_email( @@ -460,15 +488,14 @@ def lambda_handler(event, context): test_parsing_too = parse_uuid_from_string( f"folder/{app_data_entry_id}.webm" ) + # TODO: remember what all this setup shabang does orig_data_entry = process_app_upload( gpt_client=open_ai_client, - # audio_filepath="testdata/app-silent-audio.webm", - audio_filepath="testdata/sequioa-guy.webm", + audio_filepath="testdata/brainfarting-boomergpt-mail.m4a", data_entry_id=test_parsing_too, ) if test_case == "email": - # with open("testdata/katka-new-draft-test", "rb") as handle: - with open("testdata/katka-middle-1", "rb") as handle: + with open("testdata/boomergpt-mail-email", "rb") as handle: file_contents = handle.read() orig_data_entry = process_email_input( gpt_client=open_ai_client, @@ -515,4 +542,9 @@ def lambda_handler(event, context): data_entry=orig_data_entry, ) + process_generic_prompt( + gpt_client=open_ai_client, + data_entry=orig_data_entry + ) + EmailLog.save_last_email_log_to("result-app-app.html") diff --git a/backend/app/contacts_dump.py b/backend/app/contacts_dump.py index 58e7690..72c3978 100644 --- a/backend/app/contacts_dump.py +++ b/backend/app/contacts_dump.py @@ -7,7 +7,6 @@ DEFAULT_MODEL, OpenAiClient, gpt_response_to_json, - num_tokens_from_string, ) # Min transcript size somewhat trims down on "hallucinations" @@ -17,6 +16,13 @@ MAX_TRANSCRIPT_TOKEN_COUNT = 2500 # words +# https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them +def poor_mans_token_counter(text: str) -> int: + by_character = len(text) / 4 + by_words = 3 * len(text.split()) // 4 + return int(by_character + by_words) // 2 + + # TODO(P1, devx): Historically, this query give me most of the headaches. # * GPT-4 suggests using Named Entity Recognition (NER) - with nodes and edges. # * If it remains a problem - maybe just do it one-by-one, screw token cost. @@ -25,7 +31,7 @@ def extract_everyone_i_have_talked_to( gpt_client: OpenAiClient, full_transcript: str ) -> List: # NOTE: We shorten the string by words cause easier, but we better estimate the token count by OpenAI counter. - token_count = num_tokens_from_string(full_transcript) + token_count = poor_mans_token_counter(full_transcript) print(f"Transcript has {token_count} words and {len(full_transcript)} characters") # This can happen for either super-short, or silent uploads @@ -47,7 +53,7 @@ def extract_everyone_i_have_talked_to( # https://openai.com/blog/function-calling-and-other-api-updates # TODO(P0, ux): Still often-times it treats "Katka" and "Katka Sabo" as different people. query_people = """ - This is a voice note from a meeting or event where I talked to one or multiple people. + This is a transcribed voice note. List everybody I have directly talked to, omit mentions of other people in our conversation. Output a valid json list of strings of the people I have directly talked to - sometimes I don't recall their names so use a short description. @@ -55,6 +61,7 @@ def extract_everyone_i_have_talked_to( """.format( full_transcript ) + # TODO(ux): Maybe worth using GPT4-32k here, also I though I have changed these? raw_response = gpt_client.run_prompt(query_people) if raw_response is None: print("WARNING: Likely no people found in the input transcript") @@ -260,7 +267,7 @@ def run_executive_assistant_to_get_drafts( f"WARNING: full_transcript length too short {MIN_FULL_TRANSCRIPT_CHAR_LENGTH}" ) - token_count = num_tokens_from_string(full_transcript) + token_count = poor_mans_token_counter(full_transcript) print(f"extract_context_per_person on raw_transcript of {token_count} token count") people = extract_everyone_i_have_talked_to(gpt_client, full_transcript) diff --git a/backend/app/emails.py b/backend/app/emails.py index b7cdff0..c3a4e2c 100644 --- a/backend/app/emails.py +++ b/backend/app/emails.py @@ -179,7 +179,7 @@ def create_raw_email_with_attachments(params: EmailLog): """ - + params.body_text ++ + (params.body_text.replace("\n", "
") if params.body_text else "") + """ @@ -237,6 +237,7 @@ def send_email(params: EmailLog) -> bool: if not is_running_in_aws() or str(SKIP_SENDING_EMAILS) == "1": # TODO(P2, testing): Ideally we should also test the translation from params to raw email. + # TODO(P1, devx): How this can be printed "an contents None"? If params is None, it should fail earlier. print( f"Skipping ses.send_raw_email cause NOT in AWS or SKIP_SENDING_EMAILS={SKIP_SENDING_EMAILS} " f"Dumping the email {params.idempotency_id} contents {params}" @@ -445,7 +446,7 @@ def _form_data_to_email_table_html(form_data: FormData) -> str: for field in form_data.form.fields: if field.ignore_in_display or field.ignore_in_email: print( - f"INFO: ignoring {field.name} for emails (ignore_id_display: {field.ignore_in_display}" + f"INFO: ignoring {field.name} for emails (ignore_id_display: {field.ignore_in_display} " f"ignore_in_email: {field.ignore_in_email}" ) continue @@ -457,7 +458,7 @@ def _form_data_to_email_table_html(form_data: FormData) -> str: return "\n".join(rows) -def _craft_result_email_body( +def _craft_networking_person_result_email_body( person: PersonDataEntry, shareable_link: Optional[str] ) -> (str, str): # TODO(P1, ux): Migrate to new email template @@ -522,12 +523,25 @@ def _craft_result_email_body( return subject_prefix, res_content_html -def send_result( +def send_generic_result( + account_id: UUID, idempotency_id: str, email_subject: str, email_body: str +) -> bool: + email_params = EmailLog.get_email_reply_params_for_account_id( + account_id=account_id, + idempotency_id=idempotency_id, + subject=email_subject, + ) + email_params.body_text = email_body + + return send_email(params=email_params) + + +def send_networking_per_person_result( account_id: UUID, idempotency_id_prefix: str, person: PersonDataEntry ) -> bool: person_name_safe = re.sub(r"\W", "-", person.name).lower() acc: Account = Account.get_by_id(account_id) - subject_prefix, content_html = _craft_result_email_body( + subject_prefix, content_html = _craft_networking_person_result_email_body( person, shareable_link=acc.get_shareable_spreadsheet_link() ) diff --git a/backend/app/gsheets.py b/backend/app/gsheets.py index 4b9b652..0706850 100644 --- a/backend/app/gsheets.py +++ b/backend/app/gsheets.py @@ -228,6 +228,7 @@ def add_form_datas_to_spreadsheet(self, form_datas: List[FormData]): sheet_cache = {} for form_data in form_datas: + # TODO: AttributeError: 'str' object has no attribute 'value' form_name = form_data.form.form_name.value if form_name not in sheet_cache: sheet_cache[form_name] = get_or_create_worksheet( diff --git a/backend/database/email_log.py b/backend/database/email_log.py index 9bd6847..150985c 100644 --- a/backend/database/email_log.py +++ b/backend/database/email_log.py @@ -51,7 +51,7 @@ def check_if_already_sent(self) -> bool: @staticmethod def get_email_reply_params_for_account_id( account_id: uuid, idempotency_id: str, subject: Optional[str] - ): + ) -> "EmailLog": account = Account.get_by_id(account_id) return EmailLog( sender=SENDER_EMAIL, diff --git a/backend/deploy_lambda_container.sh b/backend/deploy_lambda_container.sh index ce2ef58..a4649a7 100755 --- a/backend/deploy_lambda_container.sh +++ b/backend/deploy_lambda_container.sh @@ -39,6 +39,7 @@ retry_delay=15 for i in $(seq 1 $max_retries); do echo "=== Building Image: Attempt $i ===" + # TODO(P), devx): Move this into the /app directory docker build -t 831154875375.dkr.ecr.us-east-1.amazonaws.com/draft-your-follow-ups . && break if [ $i -eq $max_retries ]; then echo "Max retries reached. Exiting." diff --git a/backend/input/app_upload.py b/backend/input/app_upload.py index 3119a8f..7ae554d 100644 --- a/backend/input/app_upload.py +++ b/backend/input/app_upload.py @@ -5,6 +5,8 @@ wait_for_email_updated_on_data_entry, ) from gpt_form_filler.openai_client import OpenAiClient + +from common.aws_utils import is_running_in_aws from database.account import Account from database.data_entry import STATE_UPLOAD_DONE from database.email_log import EmailLog @@ -28,7 +30,9 @@ def process_app_upload( converted_audio_filepath = ffmpeg_convert_audio_to_mp4(audio_filepath) data_entry.output_transcript = gpt_client.transcribe_audio( - audio_filepath=converted_audio_filepath + audio_filepath=converted_audio_filepath, + prompt_hint="voice memo", + use_cache_hit=not is_running_in_aws(), ) data_entry.state = STATE_UPLOAD_DONE data_entry.save() diff --git a/backend/input/call.py b/backend/input/call.py index 5860a24..c41e89e 100644 --- a/backend/input/call.py +++ b/backend/input/call.py @@ -3,6 +3,7 @@ import re from typing import Optional +from common.aws_utils import is_running_in_aws from common.config import SUPPORT_EMAIL from gpt_form_filler.openai_client import OpenAiClient from common.twillio_client import TwilioClient @@ -94,7 +95,9 @@ def process_voice_recording_input( audio_filepath = ffmpeg_convert_audio_to_mp4(file_path) if bool(audio_filepath): res.output_transcript = gpt_client.transcribe_audio( - audio_filepath=audio_filepath + audio_filepath=audio_filepath, + prompt_hint="phone call to my assistant", + use_cache_hit=not is_running_in_aws(), ) res.save() diff --git a/backend/input/common.py b/backend/input/common.py index 9340df2..c07bc36 100644 --- a/backend/input/common.py +++ b/backend/input/common.py @@ -4,6 +4,7 @@ # Here object_prefix is used for both local, response attachments and buckets. # NOTE: This requires the heavy ffmpeg to be installed on the machine. +# TODO(P0, ux): Support also video formats, unsure why I have picked mp4. def ffmpeg_convert_audio_to_mp4(file_path): audio_file = file_path + ".mp4" print(f"Running ffmpeg on {file_path} outputting to {audio_file}") @@ -14,6 +15,8 @@ def ffmpeg_convert_audio_to_mp4(file_path): check=True, ) print(f"Converted file saved as: {audio_file}") + # TODO(P0, devx): Make sure the output files is below 25MB OR do this in gpt-form-filler: + # https://platform.openai.com/docs/guides/speech-to-text/longer-inputs except subprocess.CalledProcessError as e: print(f"ffmpeg error occurred: {e}") traceback.print_exc() diff --git a/backend/input/email.py b/backend/input/email.py index 579928b..92fd410 100644 --- a/backend/input/email.py +++ b/backend/input/email.py @@ -9,6 +9,8 @@ store_and_get_attachments_from_email, ) from gpt_form_filler.openai_client import OpenAiClient + +from common.aws_utils import is_running_in_aws from database.account import Account from database.data_entry import STATE_UPLOAD_DONE from database.email_log import EmailLog @@ -79,6 +81,9 @@ def process_email_input( subject=f"Re: {base_email_params.subject}", # NOTE: We do NOT include the original attachments cause in the reply ) + # TODO(P0, ux): Kinda annoying to get the confirmation every time, + # instead we should just send error email when it occurs. + # That is around the time Poor Man's Opsgenie runs. send_confirmation( params=confirmation_email_params, first_time_use=account.gsheet_id is None, @@ -100,7 +105,11 @@ def process_email_input( audio_filepath = ffmpeg_convert_audio_to_mp4(attachment_file_path) if bool(audio_filepath): input_transcripts.append( - gpt_client.transcribe_audio(audio_filepath=audio_filepath) + gpt_client.transcribe_audio( + audio_filepath=audio_filepath, + prompt_hint="voice memo", + use_cache_hit=not is_running_in_aws(), + ) ) result.output_transcript = "\n\n".join(input_transcripts) result.save() diff --git a/webapp/styles/app.css b/webapp/styles/app.css index 1711386..1db20b7 100644 --- a/webapp/styles/app.css +++ b/webapp/styles/app.css @@ -613,6 +613,14 @@ video { inset: 0px; } +.-left-3 { + left: -0.75rem; +} + +.-top-2 { + top: -0.5rem; +} + .left-1\/2 { left: 50%; } @@ -685,6 +693,11 @@ video { margin-bottom: 0.25rem; } +.mx-4 { + margin-left: 1rem; + margin-right: 1rem; +} + .mb-16 { margin-bottom: 4rem; } @@ -749,6 +762,10 @@ video { margin-top: 2rem; } +.mb-6 { + margin-bottom: 1.5rem; +} + .block { display: block; } @@ -841,6 +858,10 @@ video { height: 1px; } +.h-16 { + height: 4rem; +} + .max-h-screen { max-height: 100vh; } @@ -1096,6 +1117,12 @@ video { margin-bottom: calc(2rem * var(--tw-space-y-reverse)); } +.space-x-8 > :not([hidden]) ~ :not([hidden]) { + --tw-space-x-reverse: 0; + margin-right: calc(2rem * var(--tw-space-x-reverse)); + margin-left: calc(2rem * calc(1 - var(--tw-space-x-reverse))); +} + .self-center { align-self: center; } @@ -1248,6 +1275,11 @@ video { background-color: rgb(209 213 219 / var(--tw-bg-opacity)); } +.bg-gray-500 { + --tw-bg-opacity: 1; + background-color: rgb(107 114 128 / var(--tw-bg-opacity)); +} + .bg-green-500 { --tw-bg-opacity: 1; background-color: rgb(34 197 94 / var(--tw-bg-opacity)); @@ -1422,10 +1454,6 @@ video { padding-right: 0.5rem; } -.pr-4 { - padding-right: 1rem; -} - .pr-6 { padding-right: 1.5rem; } @@ -1454,6 +1482,10 @@ video { padding-top: 1rem; } +.pr-4 { + padding-right: 1rem; +} + .text-left { text-align: left; }