|
6 | 6 |
|
7 | 7 | from ddtrace.internal.utils.version import parse_version |
8 | 8 | from ddtrace.llmobs._integrations.utils import _est_tokens |
| 9 | +from ddtrace.llmobs._utils import _get_llmobs_data_metastruct |
9 | 10 | from ddtrace.llmobs._utils import safe_json |
10 | 11 | from tests.contrib.openai.utils import assert_prompt_tracking |
11 | 12 | from tests.contrib.openai.utils import chat_completion_custom_functions |
|
22 | 23 | from tests.llmobs._utils import DEEP_TOOL_SCHEMA |
23 | 24 | from tests.llmobs._utils import _expected_llmobs_llm_span_event |
24 | 25 | from tests.llmobs._utils import _expected_llmobs_non_llm_span_event |
| 26 | +from tests.llmobs._utils import assert_llmobs_span_data |
25 | 27 |
|
26 | 28 |
|
27 | 29 | EXPECTED_TOOL_DEFINITIONS = [ |
@@ -982,6 +984,161 @@ def test_chat_completion_tool_call_with_follow_up( |
982 | 984 | ] |
983 | 985 | ) |
984 | 986 |
|
| 987 | + @pytest.mark.skipif( |
| 988 | + parse_version(openai_module.version.VERSION) < (1, 1), reason="Tool calls available after v1.1.0" |
| 989 | + ) |
| 990 | + @mock.patch("openai._base_client.SyncAPIClient.post") |
| 991 | + def test_chat_completion_tool_call_preserves_assistant_content( |
| 992 | + self, mock_completions_post, openai, openai_llmobs, test_spans |
| 993 | + ): |
| 994 | + """MLOS-605: assistant messages carrying both prose content and structured tool_calls |
| 995 | + on the same message must preserve the prose in the LLMObs span. OpenAI's native |
| 996 | + function-calling schema allows content and tool_calls to coexist (content is the |
| 997 | + model's narration, tool_calls is the structured invocation) — earlier logic cleared |
| 998 | + content unconditionally which dropped legitimate prose from replayed history. |
| 999 | + """ |
| 1000 | + mock_completions_post.return_value = mock_openai_chat_completions_response |
| 1001 | + assistant_prose = "I'll look up the student's profile before answering." |
| 1002 | + tool_call_id = "call_get_user_context_0" |
| 1003 | + messages = [ |
| 1004 | + {"role": "user", "content": chat_completion_input_description}, |
| 1005 | + { |
| 1006 | + "role": "assistant", |
| 1007 | + "content": assistant_prose, |
| 1008 | + "tool_calls": [ |
| 1009 | + { |
| 1010 | + "id": tool_call_id, |
| 1011 | + "type": "function", |
| 1012 | + "function": {"name": "extract_student_info", "arguments": "{}"}, |
| 1013 | + } |
| 1014 | + ], |
| 1015 | + }, |
| 1016 | + {"role": "tool", "tool_call_id": tool_call_id, "content": '{"verified": true}'}, |
| 1017 | + {"role": "user", "content": "Thanks, can you summarize?"}, |
| 1018 | + ] |
| 1019 | + client = openai.OpenAI() |
| 1020 | + client.chat.completions.create(model="gpt-3.5-turbo", messages=messages) |
| 1021 | + |
| 1022 | + spans = [s for trace in test_spans.pop_traces() for s in trace] |
| 1023 | + assert len(spans) == 1 |
| 1024 | + assert_llmobs_span_data( |
| 1025 | + _get_llmobs_data_metastruct(spans[0]), |
| 1026 | + span_kind="llm", |
| 1027 | + input_messages=[ |
| 1028 | + {"role": "user", "content": chat_completion_input_description}, |
| 1029 | + { |
| 1030 | + "role": "assistant", |
| 1031 | + "content": assistant_prose, |
| 1032 | + "tool_calls": [ |
| 1033 | + {"name": "extract_student_info", "arguments": {}, "tool_id": tool_call_id, "type": "function"} |
| 1034 | + ], |
| 1035 | + }, |
| 1036 | + { |
| 1037 | + "role": "tool", |
| 1038 | + "content": "", |
| 1039 | + "tool_results": [ |
| 1040 | + {"name": "", "result": '{"verified": true}', "tool_id": tool_call_id, "type": "tool_result"} |
| 1041 | + ], |
| 1042 | + }, |
| 1043 | + {"role": "user", "content": "Thanks, can you summarize?"}, |
| 1044 | + ], |
| 1045 | + ) |
| 1046 | + |
| 1047 | + @pytest.mark.skipif( |
| 1048 | + parse_version(openai_module.version.VERSION) < (1, 1), reason="Tool calls available after v1.1.0" |
| 1049 | + ) |
| 1050 | + @mock.patch("openai._base_client.SyncAPIClient.post") |
| 1051 | + def test_chat_completion_tool_call_with_none_content_does_not_leak_string( |
| 1052 | + self, mock_completions_post, openai, openai_llmobs, test_spans |
| 1053 | + ): |
| 1054 | + """MLOS-605: OpenAI returns `content=None` alongside `tool_calls` when the model issues |
| 1055 | + a pure function call with no narration. Earlier logic ran `str(None)` → "None" and |
| 1056 | + relied on the unconditional content-clear to mask it; once the clear became conditional |
| 1057 | + the literal string "None" leaked into the span. Normalize None → "" before stringifying. |
| 1058 | + """ |
| 1059 | + mock_completions_post.return_value = mock_openai_chat_completions_response |
| 1060 | + tool_call_id = "call_get_user_context_0" |
| 1061 | + messages = [ |
| 1062 | + {"role": "user", "content": chat_completion_input_description}, |
| 1063 | + { |
| 1064 | + "role": "assistant", |
| 1065 | + "content": None, |
| 1066 | + "tool_calls": [ |
| 1067 | + { |
| 1068 | + "id": tool_call_id, |
| 1069 | + "type": "function", |
| 1070 | + "function": {"name": "extract_student_info", "arguments": "{}"}, |
| 1071 | + } |
| 1072 | + ], |
| 1073 | + }, |
| 1074 | + {"role": "tool", "tool_call_id": tool_call_id, "content": '{"verified": true}'}, |
| 1075 | + {"role": "user", "content": "Thanks, can you summarize?"}, |
| 1076 | + ] |
| 1077 | + client = openai.OpenAI() |
| 1078 | + client.chat.completions.create(model="gpt-3.5-turbo", messages=messages) |
| 1079 | + |
| 1080 | + spans = [s for trace in test_spans.pop_traces() for s in trace] |
| 1081 | + assert len(spans) == 1 |
| 1082 | + assert_llmobs_span_data( |
| 1083 | + _get_llmobs_data_metastruct(spans[0]), |
| 1084 | + span_kind="llm", |
| 1085 | + input_messages=[ |
| 1086 | + {"role": "user", "content": chat_completion_input_description}, |
| 1087 | + { |
| 1088 | + "role": "assistant", |
| 1089 | + "content": "", |
| 1090 | + "tool_calls": [ |
| 1091 | + {"name": "extract_student_info", "arguments": {}, "tool_id": tool_call_id, "type": "function"} |
| 1092 | + ], |
| 1093 | + }, |
| 1094 | + { |
| 1095 | + "role": "tool", |
| 1096 | + "content": "", |
| 1097 | + "tool_results": [ |
| 1098 | + {"name": "", "result": '{"verified": true}', "tool_id": tool_call_id, "type": "tool_result"} |
| 1099 | + ], |
| 1100 | + }, |
| 1101 | + {"role": "user", "content": "Thanks, can you summarize?"}, |
| 1102 | + ], |
| 1103 | + ) |
| 1104 | + |
| 1105 | + @pytest.mark.skipif( |
| 1106 | + parse_version(openai_module.version.VERSION) < (1, 1), reason="Tool calls available after v1.1.0" |
| 1107 | + ) |
| 1108 | + @mock.patch("openai._base_client.SyncAPIClient.post") |
| 1109 | + def test_chat_completion_react_style_content_still_deduplicates( |
| 1110 | + self, mock_completions_post, openai, openai_llmobs, test_spans |
| 1111 | + ): |
| 1112 | + """Regression guard for ReAct-style agents: when content literally contains the |
| 1113 | + `Action:/Action Input:` pattern and structured tool_calls are extracted from it, |
| 1114 | + clear content to avoid rendering the same call twice in the LLMObs UI. |
| 1115 | + """ |
| 1116 | + mock_completions_post.return_value = mock_openai_chat_completions_response |
| 1117 | + react_content = "Action: extract_student_info\nAction Input: {}" |
| 1118 | + messages = [ |
| 1119 | + {"role": "user", "content": chat_completion_input_description}, |
| 1120 | + {"role": "assistant", "content": react_content}, |
| 1121 | + ] |
| 1122 | + client = openai.OpenAI() |
| 1123 | + client.chat.completions.create(model="gpt-3.5-turbo", messages=messages) |
| 1124 | + |
| 1125 | + spans = [s for trace in test_spans.pop_traces() for s in trace] |
| 1126 | + assert len(spans) == 1 |
| 1127 | + assert_llmobs_span_data( |
| 1128 | + _get_llmobs_data_metastruct(spans[0]), |
| 1129 | + span_kind="llm", |
| 1130 | + input_messages=[ |
| 1131 | + {"role": "user", "content": chat_completion_input_description}, |
| 1132 | + { |
| 1133 | + "role": "assistant", |
| 1134 | + "content": "", |
| 1135 | + "tool_calls": [ |
| 1136 | + {"name": "extract_student_info", "arguments": {}, "tool_id": "", "type": "function"} |
| 1137 | + ], |
| 1138 | + }, |
| 1139 | + ], |
| 1140 | + ) |
| 1141 | + |
985 | 1142 | @pytest.mark.skipif( |
986 | 1143 | parse_version(openai_module.version.VERSION) < (1, 66), |
987 | 1144 | reason="Responses API with custom tools available after v1.66.0", |
|
0 commit comments