Skip to content

Commit a95efe2

Browse files
authored
Fix incorrect additional-propteries in gemini pydantic validation (#354)
* Fix incorrect additional-propteries in gemini pydantic validation * 2.8.1 bump
1 parent 9e7326b commit a95efe2

File tree

4 files changed

+87
-9
lines changed

4 files changed

+87
-9
lines changed

deepfabric/llm/client.py

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import outlines
1111

1212
from google import genai
13+
from pydantic import BaseModel
1314

1415
from ..exceptions import DataSetGeneratorError
1516
from .errors import handle_provider_error
@@ -33,6 +34,71 @@ def _raise_generation_error(max_retries: int, error: Exception) -> None:
3334
raise DataSetGeneratorError(msg) from error
3435

3536

37+
def _strip_additional_properties(schema_dict: dict) -> dict:
38+
"""
39+
Recursively remove additionalProperties from JSON schema.
40+
41+
Gemini doesn't support additionalProperties field in JSON schemas.
42+
This function strips it out from the schema and all nested definitions.
43+
44+
Args:
45+
schema_dict: JSON schema dictionary
46+
47+
Returns:
48+
Modified schema dict without additionalProperties
49+
"""
50+
if not isinstance(schema_dict, dict):
51+
return schema_dict
52+
53+
# Remove additionalProperties from current level
54+
schema_dict.pop("additionalProperties", None)
55+
56+
# Recursively process nested structures
57+
if "$defs" in schema_dict:
58+
for def_name, def_schema in schema_dict["$defs"].items():
59+
schema_dict["$defs"][def_name] = _strip_additional_properties(def_schema)
60+
61+
# Process properties
62+
if "properties" in schema_dict:
63+
for prop_name, prop_schema in schema_dict["properties"].items():
64+
schema_dict["properties"][prop_name] = _strip_additional_properties(prop_schema)
65+
66+
# Process items (for arrays)
67+
if "items" in schema_dict:
68+
schema_dict["items"] = _strip_additional_properties(schema_dict["items"])
69+
70+
return schema_dict
71+
72+
73+
def _create_gemini_compatible_schema(schema: type[BaseModel]) -> type[BaseModel]:
74+
"""
75+
Create a Gemini-compatible version of a Pydantic schema.
76+
77+
Gemini doesn't support additionalProperties. This function creates a wrapper
78+
that generates schemas without this field.
79+
80+
Args:
81+
schema: Original Pydantic model
82+
83+
Returns:
84+
Wrapper model that generates Gemini-compatible schemas
85+
"""
86+
# Create a new model class that overrides model_json_schema
87+
class GeminiCompatModel(schema): # type: ignore[misc,valid-type]
88+
@classmethod
89+
def model_json_schema(cls, **kwargs):
90+
# Get the original schema
91+
original_schema = super().model_json_schema(**kwargs)
92+
# Strip additionalProperties
93+
return _strip_additional_properties(original_schema)
94+
95+
# Set name and docstring
96+
GeminiCompatModel.__name__ = f"{schema.__name__}GeminiCompat"
97+
GeminiCompatModel.__doc__ = schema.__doc__
98+
99+
return GeminiCompatModel
100+
101+
36102
def make_outlines_model(provider: str, model_name: str, **kwargs) -> Any:
37103
"""Create an Outlines model for the specified provider and model.
38104
@@ -170,12 +236,18 @@ def generate(self, prompt: str, schema: Any, max_retries: int = 3, **kwargs) ->
170236
# Convert provider-specific parameters
171237
kwargs = self._convert_generation_params(**kwargs)
172238

239+
# For Gemini, use compatible schema without additionalProperties
240+
generation_schema = schema
241+
if self.provider == "gemini" and isinstance(schema, type) and issubclass(schema, BaseModel):
242+
generation_schema = _create_gemini_compatible_schema(schema)
243+
173244
for attempt in range(max_retries):
174245
try:
175246
# Generate JSON string with Outlines using the schema as output type
176-
json_output = self.model(prompt, schema, **kwargs)
247+
json_output = self.model(prompt, generation_schema, **kwargs)
177248

178-
# Parse and validate the JSON response with Pydantic
249+
# Parse and validate the JSON response with the ORIGINAL schema
250+
# This ensures we still get proper validation
179251
return schema.model_validate_json(json_output)
180252

181253
except Exception as e:
@@ -198,9 +270,15 @@ async def generate_async(self, prompt: str, schema: Any, max_retries: int = 3, *
198270
last_error: Exception | None = None
199271
kwargs = self._convert_generation_params(**kwargs)
200272

273+
# For Gemini, use compatible schema without additionalProperties
274+
generation_schema = schema
275+
if self.provider == "gemini" and isinstance(schema, type) and issubclass(schema, BaseModel):
276+
generation_schema = _create_gemini_compatible_schema(schema)
277+
201278
for attempt in range(max_retries):
202279
try:
203-
json_output = await self.async_model(prompt, schema, **kwargs)
280+
json_output = await self.async_model(prompt, generation_schema, **kwargs)
281+
# Validate with original schema to ensure proper validation
204282
return schema.model_validate_json(json_output)
205283
except Exception as e:
206284
last_error = e

examples/xlam_v2_config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ topic_tree:
1919
Focus on scenarios where users naturally provide information incrementally
2020
and agents need to call multiple functions across conversation turns.
2121
22-
provider: "openai"
23-
model: "gpt-4o-mini"
22+
provider: "gemini"
23+
model: "gemini-2.5-flash-lite"
2424
temperature: 0.8
2525
depth: 3
2626
degree: 4
@@ -47,8 +47,8 @@ data_engine:
4747
Provide accurate, helpful responses and call functions appropriately
4848
to assist users with their requests.
4949
50-
provider: "openai"
51-
model: "gpt-4o"
50+
provider: "gemini"
51+
model: "gemini-2.5-flash-lite"
5252
temperature: 0.7
5353

5454
# Tool rotation for diversity

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "deepfabric"
3-
version = "2.8.0"
3+
version = "2.8.1"
44
description = "Large Scale Topic based Synthetic Data Generation"
55
authors = [{name = "Luke Hinds", email = "[email protected]"}]
66
readme = "README.md"

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)