Skip to content

Commit

Permalink
fix(AgentService): unescape unicode characters in conversation text (#45
Browse files Browse the repository at this point in the history
)

* fix(AgentService): unescape Unicode characters in conversation text

* style(AgentService): add newline before function definitions
  • Loading branch information
ishandhanani authored Dec 2, 2024
1 parent ec35b56 commit 39a0487
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
12 changes: 12 additions & 0 deletions services/AgentService/monologue_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,12 @@ async def monologue_create_final_conversation(
json_schema=schema,
)

# Ensure all strings are unescaped
if "dialogues" in conversation_json:
for entry in conversation_json["dialogues"]:
if "text" in entry:
entry["text"] = unescape_unicode_string(entry["text"])

prompt_tracker.track(
"create_final_conversation",
prompt,
Expand All @@ -172,3 +178,9 @@ async def monologue_create_final_conversation(
)

return Conversation.model_validate(conversation_json)


def unescape_unicode_string(s: str) -> str:
"""Convert escaped Unicode sequences to actual Unicode characters"""
# This handles both raw strings (with extra backslashes) and regular strings
return s.encode("utf-8").decode("unicode-escape")
12 changes: 12 additions & 0 deletions services/AgentService/podcast_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,12 @@ async def podcast_create_final_conversation(
json_schema=schema,
)

# Ensure all strings are unescaped
if "dialogues" in conversation_json:
for entry in conversation_json["dialogues"]:
if "text" in entry:
entry["text"] = unescape_unicode_string(entry["text"])

prompt_tracker.track(
"create_final_conversation",
prompt,
Expand All @@ -435,3 +441,9 @@ async def podcast_create_final_conversation(
)

return Conversation.model_validate(conversation_json)


def unescape_unicode_string(s: str) -> str:
"""Convert escaped Unicode sequences to actual Unicode characters"""
# This handles both raw strings (with extra backslashes) and regular strings
return s.encode("utf-8").decode("unicode-escape")

0 comments on commit 39a0487

Please sign in to comment.