diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py index c160070b66..2f4dd47ba4 100644 --- a/src/openai/lib/_parsing/_completions.py +++ b/src/openai/lib/_parsing/_completions.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import weakref from typing import TYPE_CHECKING, Any, Iterable, cast from typing_extensions import TypeVar, TypeGuard, assert_never @@ -28,6 +29,9 @@ from ...types.chat.completion_create_params import ResponseFormat as ResponseFormatParam from ...types.chat.chat_completion_message_tool_call import Function +# Cache to store weak references to schema objects +_schema_cache = weakref.WeakKeyDictionary() + ResponseFormatT = TypeVar( "ResponseFormatT", # if it isn't given then we don't do any parsing @@ -243,6 +247,10 @@ def type_to_response_format_param( # can only be a `type` response_format = cast(type, response_format) + # Check if we already have a schema for this type in the cache + if response_format in _schema_cache: + return _schema_cache[response_format] + json_schema_type: type[pydantic.BaseModel] | pydantic.TypeAdapter[Any] | None = None if is_basemodel_type(response_format): @@ -254,7 +262,7 @@ def type_to_response_format_param( else: raise TypeError(f"Unsupported response_format type - {response_format}") - return { + schema_param: ResponseFormatParam = { "type": "json_schema", "json_schema": { "schema": to_strict_json_schema(json_schema_type), @@ -262,3 +270,8 @@ def type_to_response_format_param( "strict": True, }, } + + # Store a weak reference to the schema parameter + _schema_cache[response_format] = schema_param + + return schema_param diff --git a/tests/lib/_parsing/test_memory_leak.py b/tests/lib/_parsing/test_memory_leak.py new file mode 100644 index 0000000000..4a83f4ec0b --- /dev/null +++ b/tests/lib/_parsing/test_memory_leak.py @@ -0,0 +1,50 @@ +import gc +from typing import List + +import pytest +from pydantic import Field, create_model + +from openai.lib._parsing import type_to_response_format_param +from openai.lib._parsing._completions import _schema_cache + + +@pytest.mark.asyncio +async def test_async_completions_parse_memory(): + """Test if AsyncCompletions.parse() doesn't leak memory with dynamic models""" + StepModel = create_model( + "Step", + explanation=(str, Field()), + output=(str, Field()), + ) + + # Clear the cache and record initial state + _schema_cache.clear() + initial_cache_size = len(_schema_cache) + + # Simulate the issue by creating multiple models and making calls + models = [] + for i in range(10): + # Create a new dynamic model each time + new_model = create_model( + f"MathResponse{i}", + steps=(List[StepModel], Field()), + final_answer=(str, Field()), + ) + models.append(new_model) + + # Convert to response format and check if it's in the cache + type_to_response_format_param(new_model) + assert new_model in _schema_cache + + # Record cache size with all models referenced + cache_size_with_references = len(_schema_cache) + + # Let the models go out of scope and trigger garbage collection + models = None + gc.collect() + + # After garbage collection, the cache should be significantly reduced + cache_size_after_gc = len(_schema_cache) + assert cache_size_after_gc < cache_size_with_references + # The cache size should be close to the initial size (with some tolerance) + assert cache_size_after_gc < cache_size_with_references / 2 \ No newline at end of file