Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.PHONY: clean install format lint test-unit test-integration test-integration-verbose security build all
.PHONY: test-integration-openai test-integration-gemini test-integration-llm
.PHONY: test-integration-hubs test-integration-spin test-integration-quick
.PHONY: test-integration-graph test-integration-generator

# Base command for integration tests
PYTEST_INTEGRATION = uv run pytest tests/integration --tb=short -v
Expand Down Expand Up @@ -49,6 +50,12 @@ test-integration-spin:
test-integration-quick:
$(PYTEST_INTEGRATION) -m "not huggingface"

test-integration-graph:
$(PYTEST_INTEGRATION) tests/integration/test_graph_integration.py

test-integration-generator:
$(PYTEST_INTEGRATION) tests/integration/test_generator_integration.py

security:
uv run bandit -r deepfabric/

Expand Down
160 changes: 160 additions & 0 deletions tests/integration/test_generator_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""Integration tests for DataSetGenerator with real API calls."""

import asyncio

import pytest

from deepfabric import DataSetGenerator, Graph

from .conftest import requires_gemini, requires_openai


@pytest.fixture
def openai_generator(openai_config):
"""Create a DataSetGenerator configured for OpenAI."""
return DataSetGenerator(
provider=openai_config["provider"],
model_name=openai_config["model_name"],
generation_system_prompt="You are a helpful assistant that generates training data.",
temperature=openai_config["temperature"],
)


@pytest.fixture
def gemini_generator(gemini_config):
"""Create a DataSetGenerator configured for Gemini."""
return DataSetGenerator(
provider=gemini_config["provider"],
model_name=gemini_config["model_name"],
generation_system_prompt="You are a helpful assistant that generates training data.",
temperature=gemini_config["temperature"],
)


@pytest.fixture
def small_topic_graph(openai_config):
"""Create a small topic graph for generator tests."""
graph = Graph(
topic_prompt="Python Programming",
provider=openai_config["provider"],
model_name=openai_config["model_name"],
temperature=openai_config["temperature"],
degree=2,
depth=1,
)

async def build_graph():
async for _ in graph.build_async():
pass

asyncio.run(build_graph())
return graph


class TestDataSetGeneratorOpenAI:
"""Integration tests for DataSetGenerator with OpenAI provider."""

@requires_openai
@pytest.mark.openai
def test_basic_generation(self, openai_generator):
"""Test basic dataset generation without topic model."""
result = openai_generator.create_data(
num_steps=1,
batch_size=2,
)

# Result should be a HuggingFace Dataset
assert result is not None
assert len(result) >= 1

@requires_openai
@pytest.mark.openai
def test_generation_with_topic_model(self, openai_generator, small_topic_graph):
"""Test dataset generation with a topic graph."""
result = openai_generator.create_data(
num_steps=1,
batch_size=2,
topic_model=small_topic_graph,
)

assert result is not None
assert len(result) >= 1

@requires_openai
@pytest.mark.openai
def test_async_generation(self, openai_generator):
"""Test async dataset generation."""

async def run_async():
return await openai_generator.create_data_async(
num_steps=1,
batch_size=2,
)

result = asyncio.run(run_async())

assert result is not None
assert len(result) >= 1

@requires_openai
@pytest.mark.openai
def test_generation_with_chain_of_thought(self, openai_config):
"""Test generation with chain_of_thought conversation type."""
generator = DataSetGenerator(
provider=openai_config["provider"],
model_name=openai_config["model_name"],
generation_system_prompt="You are a reasoning assistant.",
temperature=openai_config["temperature"],
conversation_type="chain_of_thought",
reasoning_style="freetext",
)

result = generator.create_data(
num_steps=1,
batch_size=1,
)

assert result is not None
assert len(result) >= 1


class TestDataSetGeneratorGemini:
"""Integration tests for DataSetGenerator with Gemini provider."""

@requires_gemini
@pytest.mark.gemini
def test_basic_generation(self, gemini_generator):
"""Test basic dataset generation with Gemini."""

async def run_async():
return await gemini_generator.create_data_async(
num_steps=1,
batch_size=2,
)

result = asyncio.run(run_async())

assert result is not None
assert len(result) >= 1

@requires_gemini
@pytest.mark.gemini
def test_generation_saves_to_file(self, tmp_path, gemini_generator):
"""Test that generated data can be saved."""

async def run_async():
return await gemini_generator.create_data_async(
num_steps=1,
batch_size=2,
)

asyncio.run(run_async())

# Save dataset
out_path = tmp_path / "dataset.jsonl"
gemini_generator.save_dataset(str(out_path))

# Verify file was created
assert out_path.exists()
lines = out_path.read_text().strip().split("\n")
assert len(lines) >= 1
150 changes: 150 additions & 0 deletions tests/integration/test_graph_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""Integration tests for Graph with real API calls."""

import asyncio
import json

import pytest # pyright: ignore[reportMissingImports]

from deepfabric import Graph

from .conftest import requires_gemini, requires_openai


class TestGraphOpenAI:
"""Integration tests for Graph with OpenAI provider."""

@requires_openai
@pytest.mark.openai
def test_graph_builds_basic(self, openai_config):
"""Test basic graph building with OpenAI."""
degree = 2
depth = 1
topic = "Machine Learning"

graph = Graph(
topic_prompt=topic,
provider=openai_config["provider"],
model_name=openai_config["model_name"],
temperature=openai_config["temperature"],
degree=degree,
depth=depth,
)

async def run_build():
return [event async for event in graph.build_async()]

events = asyncio.run(run_build())

# Verify build completion
completes = [e for e in events if e.get("event") == "build_complete"]
assert len(completes) == 1

# Verify paths were built
paths = graph.get_all_paths()
assert len(paths) >= 1
assert all(p[0] == topic for p in paths)

@requires_openai
@pytest.mark.openai
def test_graph_save_and_load_roundtrip(self, tmp_path, openai_config):
"""Test saving and loading a graph with OpenAI."""
degree = 2
depth = 1
topic = "Data Science"

graph = Graph(
topic_prompt=topic,
provider=openai_config["provider"],
model_name=openai_config["model_name"],
temperature=openai_config["temperature"],
degree=degree,
depth=depth,
)

async def build_graph():
async for _ in graph.build_async():
pass

asyncio.run(build_graph())

# Save to file
out_path = tmp_path / "graph.json"
graph.save(str(out_path))

# Verify file structure
data = json.loads(out_path.read_text())
assert "nodes" in data
assert "root_id" in data
assert len(data["nodes"]) >= 1

# Load into new graph and verify
graph_params = {
"topic_prompt": topic,
"provider": openai_config["provider"],
"model_name": openai_config["model_name"],
"degree": degree,
"depth": depth,
}
new_graph = Graph.from_json(str(out_path), graph_params)

assert new_graph.get_all_paths() == graph.get_all_paths()


class TestGraphGemini:
"""Integration tests for Graph with Gemini provider."""

@requires_gemini
@pytest.mark.gemini
def test_graph_builds_basic(self, gemini_config):
"""Test basic graph building with Gemini."""
degree = 2
depth = 1
topic = "Cloud Computing"

graph = Graph(
topic_prompt=topic,
provider=gemini_config["provider"],
model_name=gemini_config["model_name"],
temperature=gemini_config["temperature"],
degree=degree,
depth=depth,
)

async def run_build():
return [event async for event in graph.build_async()]

events = asyncio.run(run_build())

# Verify build completion
completes = [e for e in events if e.get("event") == "build_complete"]
assert len(completes) == 1

# Verify paths were built
paths = graph.get_all_paths()
assert len(paths) >= 1
assert all(p[0] == topic for p in paths)

@requires_gemini
@pytest.mark.gemini
def test_graph_no_cycles(self, gemini_config):
"""Test that generated graphs have no cycles."""
graph = Graph(
topic_prompt="Software Engineering",
provider=gemini_config["provider"],
model_name=gemini_config["model_name"],
temperature=gemini_config["temperature"],
degree=2,
depth=2,
)

async def build_graph():
async for _ in graph.build_async():
pass

asyncio.run(build_graph())

# Verify no cycles
assert not graph.has_cycle()
# Verify we got paths
paths = graph.get_all_paths()
assert len(paths) >= 1
6 changes: 2 additions & 4 deletions tests/integration/test_llm_client_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import asyncio

import pytest
import pytest # pyright: ignore[reportMissingImports]

from deepfabric.llm.client import LLMClient
from deepfabric.schemas import ChatMessage, ChatTranscript, TopicList
Expand Down Expand Up @@ -68,9 +68,7 @@ async def run_stream():
final_result = None

# generate_async_stream yields tuples: (chunk, None) or (None, result)
async for chunk, result in openai_client.generate_async_stream(
prompt, ChatTranscript
):
async for chunk, result in openai_client.generate_async_stream(prompt, ChatTranscript):
if chunk is not None:
chunks.append(chunk)
if result is not None:
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_tree_integration.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
import json

import pytest
import pytest # pyright: ignore[reportMissingImports]

from deepfabric import Tree, topic_manager
from deepfabric.utils import read_topic_tree_from_jsonl
Expand Down
Loading