Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.PHONY: clean install format lint test-unit test-integration test-integration-verbose security build all
.PHONY: test-integration-openai test-integration-gemini test-integration-llm
.PHONY: test-integration-hubs test-integration-spin test-integration-quick
.PHONY: test-integration-graph test-integration-generator

# Base command for integration tests
PYTEST_INTEGRATION = uv run pytest tests/integration --tb=short -v
Expand Down Expand Up @@ -49,6 +50,12 @@ test-integration-spin:
test-integration-quick:
$(PYTEST_INTEGRATION) -m "not huggingface"

test-integration-graph:
$(PYTEST_INTEGRATION) tests/integration/test_graph_integration.py

test-integration-generator:
$(PYTEST_INTEGRATION) tests/integration/test_generator_integration.py

security:
uv run bandit -r deepfabric/

Expand Down
159 changes: 159 additions & 0 deletions tests/integration/test_generator_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
"""Integration tests for DataSetGenerator with real API calls."""

import asyncio

import pytest

from deepfabric import DataSetGenerator, Graph

from .conftest import requires_gemini, requires_openai


@pytest.fixture
def openai_generator(openai_config):
"""Create a DataSetGenerator configured for OpenAI."""
return DataSetGenerator(
provider=openai_config["provider"],
model_name=openai_config["model_name"],
generation_system_prompt="You are a helpful assistant that generates training data.",
temperature=openai_config["temperature"],
)


@pytest.fixture
def gemini_generator(gemini_config):
"""Create a DataSetGenerator configured for Gemini."""
return DataSetGenerator(
provider=gemini_config["provider"],
model_name=gemini_config["model_name"],
generation_system_prompt="You are a helpful assistant that generates training data.",
temperature=gemini_config["temperature"],
)


@pytest.fixture
def small_topic_graph(openai_config):
"""Create a small topic graph for generator tests."""
graph = Graph(
topic_prompt="Python Programming",
provider=openai_config["provider"],
model_name=openai_config["model_name"],
temperature=openai_config["temperature"],
degree=2,
depth=1,
)

async def run_build():
return [event async for event in graph.build_async()]

asyncio.run(run_build())
return graph


class TestDataSetGeneratorOpenAI:
"""Integration tests for DataSetGenerator with OpenAI provider."""

@requires_openai
@pytest.mark.openai
def test_basic_generation(self, openai_generator):
"""Test basic dataset generation without topic model."""
result = openai_generator.create_data(
num_steps=1,
batch_size=2,
)

# Result should be a HuggingFace Dataset
assert result is not None
assert len(result) >= 1

@requires_openai
@pytest.mark.openai
def test_generation_with_topic_model(self, openai_generator, small_topic_graph):
"""Test dataset generation with a topic graph."""
result = openai_generator.create_data(
num_steps=1,
batch_size=2,
topic_model=small_topic_graph,
)

assert result is not None
assert len(result) >= 1

@requires_openai
@pytest.mark.openai
def test_async_generation(self, openai_generator):
"""Test async dataset generation."""

async def run_async():
return await openai_generator.create_data_async(
num_steps=1,
batch_size=2,
)

result = asyncio.run(run_async())

assert result is not None
assert len(result) >= 1

@requires_openai
@pytest.mark.openai
def test_generation_with_chain_of_thought(self, openai_config):
"""Test generation with chain_of_thought conversation type."""
generator = DataSetGenerator(
provider=openai_config["provider"],
model_name=openai_config["model_name"],
generation_system_prompt="You are a reasoning assistant.",
temperature=openai_config["temperature"],
conversation_type="chain_of_thought",
reasoning_style="freetext",
)

result = generator.create_data(
num_steps=1,
batch_size=1,
)

assert result is not None
assert len(result) >= 1


class TestDataSetGeneratorGemini:
"""Integration tests for DataSetGenerator with Gemini provider."""

@requires_gemini
@pytest.mark.gemini
def test_basic_generation(self, gemini_generator):
"""Test basic dataset generation with Gemini."""

async def run_async():
return await gemini_generator.create_data_async(
num_steps=1,
batch_size=2,
)

result = asyncio.run(run_async())

assert result is not None
assert len(result) >= 1

@requires_gemini
@pytest.mark.gemini
def test_generation_saves_to_file(self, tmp_path, gemini_generator):
"""Test that generated data can be saved."""

async def run_async():
return await gemini_generator.create_data_async(
num_steps=1,
batch_size=2,
)

asyncio.run(run_async())

# Save dataset
out_path = tmp_path / "dataset.jsonl"
gemini_generator.save_dataset(str(out_path))

# Verify file was created
assert out_path.exists()
lines = out_path.read_text().strip().split("\n")
assert len(lines) >= 1
148 changes: 148 additions & 0 deletions tests/integration/test_graph_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""Integration tests for Graph with real API calls."""

import asyncio
import json

import pytest # pyright: ignore[reportMissingImports]

from deepfabric import Graph

from .conftest import requires_gemini, requires_openai


class TestGraphOpenAI:
"""Integration tests for Graph with OpenAI provider."""

@requires_openai
@pytest.mark.openai
def test_graph_builds_basic(self, openai_config):
"""Test basic graph building with OpenAI."""
degree = 2
depth = 1
topic = "Machine Learning"

graph = Graph(
topic_prompt=topic,
provider=openai_config["provider"],
model_name=openai_config["model_name"],
temperature=openai_config["temperature"],
degree=degree,
depth=depth,
)

async def run_build():
return [event async for event in graph.build_async()]

events = asyncio.run(run_build())

# Verify build completion
completes = [e for e in events if e.get("event") == "build_complete"]
assert len(completes) == 1

# Verify paths were built
paths = graph.get_all_paths()
assert len(paths) >= 1
assert all(p[0] == topic for p in paths)

@requires_openai
@pytest.mark.openai
def test_graph_save_and_load_roundtrip(self, tmp_path, openai_config):
"""Test saving and loading a graph with OpenAI."""
degree = 2
depth = 1
topic = "Data Science"

graph = Graph(
topic_prompt=topic,
provider=openai_config["provider"],
model_name=openai_config["model_name"],
temperature=openai_config["temperature"],
degree=degree,
depth=depth,
)

async def run_build():
return [event async for event in graph.build_async()]

asyncio.run(run_build())

# Save to file
out_path = tmp_path / "graph.json"
graph.save(str(out_path))

# Verify file structure
data = json.loads(out_path.read_text())
assert "nodes" in data
assert "root_id" in data
assert len(data["nodes"]) >= 1

# Load into new graph and verify
graph_params = {
"topic_prompt": "placeholder",
"provider": openai_config["provider"],
"model_name": openai_config["model_name"],
"degree": degree,
"depth": depth,
}
new_graph = Graph.from_json(str(out_path), graph_params)

assert new_graph.get_all_paths() == graph.get_all_paths()


class TestGraphGemini:
"""Integration tests for Graph with Gemini provider."""

@requires_gemini
@pytest.mark.gemini
def test_graph_builds_basic(self, gemini_config):
"""Test basic graph building with Gemini."""
degree = 2
depth = 1
topic = "Cloud Computing"

graph = Graph(
topic_prompt=topic,
provider=gemini_config["provider"],
model_name=gemini_config["model_name"],
temperature=gemini_config["temperature"],
degree=degree,
depth=depth,
)

async def run_build():
return [event async for event in graph.build_async()]

events = asyncio.run(run_build())

# Verify build completion
completes = [e for e in events if e.get("event") == "build_complete"]
assert len(completes) == 1

# Verify paths were built
paths = graph.get_all_paths()
assert len(paths) >= 1
assert all(p[0] == topic for p in paths)

@requires_gemini
@pytest.mark.gemini
def test_graph_no_cycles(self, gemini_config):
"""Test that generated graphs have no cycles."""
graph = Graph(
topic_prompt="Software Engineering",
provider=gemini_config["provider"],
model_name=gemini_config["model_name"],
temperature=gemini_config["temperature"],
degree=2,
depth=2,
)

async def run_build():
return [event async for event in graph.build_async()]

asyncio.run(run_build())

# Verify no cycles
assert not graph.has_cycle()
# Verify we got paths
paths = graph.get_all_paths()
assert len(paths) >= 1
6 changes: 2 additions & 4 deletions tests/integration/test_llm_client_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import asyncio

import pytest
import pytest # pyright: ignore[reportMissingImports]

from deepfabric.llm.client import LLMClient
from deepfabric.schemas import ChatMessage, ChatTranscript, TopicList
Expand Down Expand Up @@ -68,9 +68,7 @@ async def run_stream():
final_result = None

# generate_async_stream yields tuples: (chunk, None) or (None, result)
async for chunk, result in openai_client.generate_async_stream(
prompt, ChatTranscript
):
async for chunk, result in openai_client.generate_async_stream(prompt, ChatTranscript):
if chunk is not None:
chunks.append(chunk)
if result is not None:
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_tree_integration.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
import json

import pytest
import pytest # pyright: ignore[reportMissingImports]

from deepfabric import Tree, topic_manager
from deepfabric.utils import read_topic_tree_from_jsonl
Expand Down
Loading