petercsiba · petercsiba · Sep 12, 2024 · Sep 11, 2024 · Sep 12, 2024 · Sep 12, 2024
diff --git a/backend/api/app.py b/backend/api/app.py
@@ -190,6 +190,7 @@ class GetPresignedUrlResponse(BaseModel):
     account_id: Optional[str] = None  # uuid really
 
 
+# TODO(P0, ux): Make sure this works with uploading files too; not just the recorded ones
 @app.get("/upload/voice", response_model=GetPresignedUrlResponse)
 async def get_presigned_url(request: Request, response: Response, current_user: Optional[UserFrontEnd] = Depends(
     maybe_get_current_user

diff --git a/backend/app/app.py b/backend/app/app.py
@@ -16,11 +16,11 @@
 from app.contacts_dump import run_executive_assistant_to_get_drafts
 from app.datashare import PersonDataEntry
 from app.emails import (
-    send_result,
+    send_networking_per_person_result,
     send_result_no_people_found,
     send_result_rest_of_the_crowd,
     send_technical_failure_email,
-    wait_for_email_updated_on_data_entry,
+    wait_for_email_updated_on_data_entry, send_generic_result,
 )
 from app.food_dump import run_food_ingredient_extraction
 from app.form_library import FormName
@@ -34,7 +34,7 @@
     SKIP_SHARE_SPREADSHEET, POSTGRES_LOGIN_URL_FROM_ENV,
 )
 from gpt_form_filler.form import FormData
-from gpt_form_filler.openai_client import CHEAPEST_MODEL, OpenAiClient
+from gpt_form_filler.openai_client import CHEAPEST_MODEL, OpenAiClient, BEST_MODEL
 
 from common.gpt_client import open_ai_client_with_db_cache
 from common.twillio_client import TwilioClient
@@ -59,6 +59,7 @@
 APP_UPLOADS_BUCKET = "requests-from-api-voxana"
 EMAIL_BUCKET = "draft-requests-from-ai-mail-voxana"
 PHONE_RECORDINGS_BUCKET = "requests-from-twilio"
+GPTo_MODEL = "gpt-4o-2024-08-06"
 # RESPONSE_EMAILS_MAX_PER_DATA_ENTRY = 3
 
 
@@ -129,7 +130,7 @@ def sync_form_datas_to_gsheets(account_id: uuid.UUID, form_datas: List[FormData]
 
 
 # TODO(P0, dumpsheet migration): This is trying to be over-smart, we should just have the user to choose the sheet.
-def get_workflow_name(gpt_client: OpenAiClient, transcript: str) -> FormName:
+def get_workflow_name(gpt_client: OpenAiClient, transcript: str) -> Optional[FormName]:
     topics = "\n".join(
         f"* {name} -> {description}"
         for name, description in FORM_CLASSIFICATION.items()
@@ -149,7 +150,7 @@ def get_workflow_name(gpt_client: OpenAiClient, transcript: str) -> FormName:
         print(f"classified transcript as {raw_response}")
         return FormName.from_str(raw_response)
 
-    default_classification = FormName.CONTACTS
+    default_classification = None
     print(
         f"WARNING: classified transcript as unknown type: {raw_response}; defaulting to {default_classification}"
     )
@@ -223,7 +224,7 @@ def process_networking_transcript(
     # SEND EMAILS
     for person in legit_results:
         # if user.contact_method() == "email":
-        send_result(
+        send_networking_per_person_result(
             account_id=data_entry.account_id,
             idempotency_id_prefix=data_entry.idempotency_id,
             person=person,
@@ -358,6 +359,39 @@ def first_lambda_handler_wrapper(event, context) -> BaseDataEntry:
     return data_entry
 
 
+def process_generic_prompt(gpt_client, data_entry) -> str:
+    print("process_generic_prompt for data_entry", data_entry.id)
+
+    task = Task.create_task(workflow_name="generic_id", data_entry_id=data_entry.id)
+    # TODO(P1, devx): With gpt-form-filler migration, we lost the task_id setting. Would be nice to have it back.
+    # gpt_client.set_task_id(task.id)
+
+    format_prompt = "Respond to this prompt as a human executive assistant in a plaintext email: "
+    result = gpt_client.run_prompt(format_prompt + data_entry.output_transcript, model=BEST_MODEL)
+
+    transcript_prompt = """
+        Just reformat this transcript, omit filler words, better sentence structure, keep the wording,
+        add paragraphs if needed. Especially make sure you keep all mentioned facts and details.
+    """
+    # CHEAPEST_MODEL leads to overly short answers.
+    transcription = gpt_client.run_prompt(transcript_prompt + data_entry.output_transcript, model=GPTo_MODEL)
-    transcription = gpt_client.run_prompt(transcript_prompt + data_entry.output_transcript, model=GPTo_MODEL)
+        transcription = gpt_client.run_prompt(transcript_prompt + data_entry.output_transcript, model=BEST_MODEL)
-    transcription = gpt_client.run_prompt(transcript_prompt + data_entry.output_transcript, model=GPTo_MODEL)
+        transcription = gpt_client.run_prompt(transcript_prompt + data_entry.output_transcript, model=BEST_MODEL)
+
+    if not wait_for_email_updated_on_data_entry(data_entry.id, max_wait_seconds=3 * 60):
+        print(
+            f"WARNING: email missing for data_entry {data_entry.id} - cannot send results email"
+        )
+
+    # sync_form_datas_to_gsheets(data_entry.account_id, form_datas=form_datas)
+    send_generic_result(
+        account_id=data_entry.account_id,
+        idempotency_id=data_entry.idempotency_id + "-generic-result",
+        email_subject=f"Re: {data_entry.display_name}",
+        email_body=result + "\n\n === Transcription === \n\n" + transcription,
+    )
+
+    return result
+
+
 def second_lambda_handler_wrapper(data_entry: BaseDataEntry):
     if not wait_for_email_updated_on_data_entry(data_entry.id, max_wait_seconds=5 * 60):
         print(
@@ -367,24 +401,18 @@ def second_lambda_handler_wrapper(data_entry: BaseDataEntry):
     acc: BaseAccount = BaseAccount.get_by_id(data_entry.account_id)
     print(f"gonna process transcript for account {acc.__dict__}")
 
-    if (
-        str(data_entry.account.id) == "c6b5882d-929a-41c5-8eb0-3740965b8e8e"
-        or ENV == ENV_LOCAL
-    ):
-        if (
-            get_workflow_name(gpt_client, data_entry.output_transcript)
-            == FormName.FOOD_LOG
-        ):
-            return process_food_log_transcript(
-                gpt_client=gpt_client, data_entry=data_entry
-            )
+    suggested_workflow_name = get_workflow_name(gpt_client, data_entry.output_transcript)
+    if suggested_workflow_name == FormName.CONTACTS:
+        process_networking_transcript(
+            gpt_client=gpt_client,
+            data_entry=data_entry,
+        )
+    if suggested_workflow_name == FormName.FOOD_LOG:
+        process_food_log_transcript(
+            gpt_client=gpt_client, data_entry=data_entry
+        )
 
-    # Our OG product
-    # NOTE: When we actually separate them - be careful about re-tries to clear the output.
-    return process_networking_transcript(
-        gpt_client=gpt_client,
-        data_entry=data_entry,
-    )
+    process_generic_prompt(gpt_client, data_entry)
 
 
 def _event_idempotency_id(event):
@@ -443,7 +471,7 @@ def lambda_handler(event, context):
         open_ai_client = open_ai_client_with_db_cache()
         # open_ai_client.run_prompt(f"test {time.time()}")
 
-        test_case = "app"  # FOR EASY TEST CASE SWITCHING
+        test_case = "email"  # FOR EASY TEST CASE SWITCHING
         orig_data_entry = None
         if test_case == "app":
             app_account = Account.get_or_onboard_for_email(
@@ -460,15 +488,14 @@ def lambda_handler(event, context):
             test_parsing_too = parse_uuid_from_string(
                 f"folder/{app_data_entry_id}.webm"
             )
+            # TODO: remember what all this setup shabang does
             orig_data_entry = process_app_upload(
                 gpt_client=open_ai_client,
-                # audio_filepath="testdata/app-silent-audio.webm",
-                audio_filepath="testdata/sequioa-guy.webm",
+                audio_filepath="testdata/brainfarting-boomergpt-mail.m4a",
                 data_entry_id=test_parsing_too,
             )
         if test_case == "email":
-            # with open("testdata/katka-new-draft-test", "rb") as handle:
-            with open("testdata/katka-middle-1", "rb") as handle:
+            with open("testdata/boomergpt-mail-email", "rb") as handle:
                 file_contents = handle.read()
                 orig_data_entry = process_email_input(
                     gpt_client=open_ai_client,
@@ -515,4 +542,9 @@ def lambda_handler(event, context):
                 data_entry=orig_data_entry,
             )
 
+        process_generic_prompt(
+            gpt_client=open_ai_client,
+            data_entry=orig_data_entry
+        )
+
         EmailLog.save_last_email_log_to("result-app-app.html")
diff --git a/backend/app/contacts_dump.py b/backend/app/contacts_dump.py
@@ -7,7 +7,6 @@
     DEFAULT_MODEL,
     OpenAiClient,
     gpt_response_to_json,
-    num_tokens_from_string,
 )
 
 # Min transcript size somewhat trims down on "hallucinations"
@@ -17,6 +16,13 @@
 MAX_TRANSCRIPT_TOKEN_COUNT = 2500  # words
 
 
+# https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
+def poor_mans_token_counter(text: str) -> int:
-def poor_mans_token_counter(text: str) -> int:
+    return int((by_character + by_words) / 2)
-def poor_mans_token_counter(text: str) -> int:
+    return int((by_character + by_words) / 2)
+    by_character = len(text) / 4
+    by_words = 3 * len(text.split()) // 4
+    return int(by_character + by_words) // 2
+
+
 # TODO(P1, devx): Historically, this query give me most of the headaches.
 #   * GPT-4 suggests using Named Entity Recognition (NER) - with nodes and edges.
 #   * If it remains a problem - maybe just do it one-by-one, screw token cost.
@@ -25,7 +31,7 @@ def extract_everyone_i_have_talked_to(
     gpt_client: OpenAiClient, full_transcript: str
 ) -> List:
     # NOTE: We shorten the string by words cause easier, but we better estimate the token count by OpenAI counter.
-    token_count = num_tokens_from_string(full_transcript)
+    token_count = poor_mans_token_counter(full_transcript)
     print(f"Transcript has {token_count} words and {len(full_transcript)} characters")
 
     # This can happen for either super-short, or silent uploads
@@ -47,14 +53,15 @@ def extract_everyone_i_have_talked_to(
     # https://openai.com/blog/function-calling-and-other-api-updates
     # TODO(P0, ux): Still often-times it treats "Katka" and "Katka Sabo" as different people.
     query_people = """
-    This is a voice note from a meeting or event where I talked to one or multiple people.
+    This is a transcribed voice note.
     List everybody I have directly talked to, omit mentions of other people in our conversation.
     Output a valid json list of strings of the people I have directly talked to
     - sometimes I don't recall their names so use a short description.
     Voice transcript of our meeting: {}
         """.format(
         full_transcript
     )
+    # TODO(ux): Maybe worth using GPT4-32k here, also I though I have changed these?
     raw_response = gpt_client.run_prompt(query_people)
     if raw_response is None:
         print("WARNING: Likely no people found in the input transcript")
@@ -260,7 +267,7 @@ def run_executive_assistant_to_get_drafts(
             f"WARNING: full_transcript length too short {MIN_FULL_TRANSCRIPT_CHAR_LENGTH}"
         )
 
-    token_count = num_tokens_from_string(full_transcript)
+    token_count = poor_mans_token_counter(full_transcript)
     print(f"extract_context_per_person on raw_transcript of {token_count} token count")
 
     people = extract_everyone_i_have_talked_to(gpt_client, full_transcript)

diff --git a/backend/app/emails.py b/backend/app/emails.py
@@ -179,7 +179,7 @@ def create_raw_email_with_attachments(params: EmailLog):
             <head></head>
             <body>
               """
-            + params.body_text
++            + (params.body_text.replace("\n", "<br />") if params.body_text else "")
             + """
         </body>
         </html>
@@ -237,6 +237,7 @@ def send_email(params: EmailLog) -> bool:
 
     if not is_running_in_aws() or str(SKIP_SENDING_EMAILS) == "1":
         # TODO(P2, testing): Ideally we should also test the translation from params to raw email.
+        # TODO(P1, devx): How this can be printed "an contents None"? If params is None, it should fail earlier.
         print(
             f"Skipping ses.send_raw_email cause NOT in AWS or SKIP_SENDING_EMAILS={SKIP_SENDING_EMAILS} "
             f"Dumping the email {params.idempotency_id} contents {params}"
@@ -445,7 +446,7 @@ def _form_data_to_email_table_html(form_data: FormData) -> str:
     for field in form_data.form.fields:
         if field.ignore_in_display or field.ignore_in_email:
             print(
-                f"INFO: ignoring {field.name} for emails (ignore_id_display: {field.ignore_in_display}"
+                f"INFO: ignoring {field.name} for emails (ignore_id_display: {field.ignore_in_display} "
                 f"ignore_in_email: {field.ignore_in_email}"
             )
             continue
@@ -457,7 +458,7 @@ def _form_data_to_email_table_html(form_data: FormData) -> str:
     return "\n".join(rows)
 
 
-def _craft_result_email_body(
+def _craft_networking_person_result_email_body(
     person: PersonDataEntry, shareable_link: Optional[str]
 ) -> (str, str):
     # TODO(P1, ux): Migrate to new email template
@@ -522,12 +523,25 @@ def _craft_result_email_body(
     return subject_prefix, res_content_html
 
 
-def send_result(
+def send_generic_result(
+    account_id: UUID, idempotency_id: str, email_subject: str, email_body: str
+) -> bool:
+    email_params = EmailLog.get_email_reply_params_for_account_id(
+        account_id=account_id,
+        idempotency_id=idempotency_id,
+        subject=email_subject,
+    )
+    email_params.body_text = email_body
+
+    return send_email(params=email_params)
+
+
+def send_networking_per_person_result(
     account_id: UUID, idempotency_id_prefix: str, person: PersonDataEntry
 ) -> bool:
     person_name_safe = re.sub(r"\W", "-", person.name).lower()
     acc: Account = Account.get_by_id(account_id)
-    subject_prefix, content_html = _craft_result_email_body(
+    subject_prefix, content_html = _craft_networking_person_result_email_body(
         person, shareable_link=acc.get_shareable_spreadsheet_link()
     )
 

diff --git a/backend/app/gsheets.py b/backend/app/gsheets.py
@@ -228,6 +228,7 @@ def add_form_datas_to_spreadsheet(self, form_datas: List[FormData]):
         sheet_cache = {}
 
         for form_data in form_datas:
+            # TODO: AttributeError: 'str' object has no attribute 'value'
-            # TODO: AttributeError: 'str' object has no attribute 'value'
+            form_name = form_data.form.form_name
-            # TODO: AttributeError: 'str' object has no attribute 'value'
+            form_name = form_data.form.form_name
             form_name = form_data.form.form_name.value
             if form_name not in sheet_cache:
                 sheet_cache[form_name] = get_or_create_worksheet(

diff --git a/backend/database/email_log.py b/backend/database/email_log.py
@@ -51,7 +51,7 @@ def check_if_already_sent(self) -> bool:
     @staticmethod
     def get_email_reply_params_for_account_id(
         account_id: uuid, idempotency_id: str, subject: Optional[str]
-    ):
+    ) -> "EmailLog":
         account = Account.get_by_id(account_id)
         return EmailLog(
             sender=SENDER_EMAIL,

diff --git a/backend/deploy_lambda_container.sh b/backend/deploy_lambda_container.sh
@@ -39,6 +39,7 @@ retry_delay=15
 
 for i in $(seq 1 $max_retries); do
   echo "=== Building Image: Attempt $i ==="
+  # TODO(P), devx): Move this into the /app directory
   docker build -t 831154875375.dkr.ecr.us-east-1.amazonaws.com/draft-your-follow-ups .  && break
   if [ $i -eq $max_retries ]; then
     echo "Max retries reached. Exiting."

diff --git a/backend/input/app_upload.py b/backend/input/app_upload.py
@@ -5,6 +5,8 @@
     wait_for_email_updated_on_data_entry,
 )
 from gpt_form_filler.openai_client import OpenAiClient
+
+from common.aws_utils import is_running_in_aws
 from database.account import Account
 from database.data_entry import STATE_UPLOAD_DONE
 from database.email_log import EmailLog
@@ -28,7 +30,9 @@ def process_app_upload(
     converted_audio_filepath = ffmpeg_convert_audio_to_mp4(audio_filepath)
 
     data_entry.output_transcript = gpt_client.transcribe_audio(
-        audio_filepath=converted_audio_filepath
+        audio_filepath=converted_audio_filepath,
+        prompt_hint="voice memo",
+        use_cache_hit=not is_running_in_aws(),
     )
     data_entry.state = STATE_UPLOAD_DONE
     data_entry.save()

diff --git a/backend/input/call.py b/backend/input/call.py
@@ -3,6 +3,7 @@
 import re
 from typing import Optional
 
+from common.aws_utils import is_running_in_aws
 from common.config import SUPPORT_EMAIL
 from gpt_form_filler.openai_client import OpenAiClient
 from common.twillio_client import TwilioClient
@@ -94,7 +95,9 @@ def process_voice_recording_input(
     audio_filepath = ffmpeg_convert_audio_to_mp4(file_path)
     if bool(audio_filepath):
         res.output_transcript = gpt_client.transcribe_audio(
-            audio_filepath=audio_filepath
+            audio_filepath=audio_filepath,
+            prompt_hint="phone call to my assistant",
+            use_cache_hit=not is_running_in_aws(),
         )
 
     res.save()

diff --git a/backend/input/common.py b/backend/input/common.py
@@ -4,6 +4,7 @@
 
 # Here object_prefix is used for both local, response attachments and buckets.
 # NOTE: This requires the heavy ffmpeg to be installed on the machine.
+# TODO(P0, ux): Support also video formats, unsure why I have picked mp4.
 def ffmpeg_convert_audio_to_mp4(file_path):
     audio_file = file_path + ".mp4"
     print(f"Running ffmpeg on {file_path} outputting to {audio_file}")
@@ -14,6 +15,8 @@ def ffmpeg_convert_audio_to_mp4(file_path):
             check=True,
         )
         print(f"Converted file saved as: {audio_file}")
+        # TODO(P0, devx): Make sure the output files is below 25MB OR do this in gpt-form-filler:
+        #   https://platform.openai.com/docs/guides/speech-to-text/longer-inputs
     except subprocess.CalledProcessError as e:
         print(f"ffmpeg error occurred: {e}")
         traceback.print_exc()