feat: Change package name

drmingler · Feb 14, 2025 · 5230690 · 5230690
1 parent 70af57c
commit 5230690
Show file tree

Hide file tree

Showing 14 changed files with 80 additions and 80 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
-# LLMLoader
+# SmartLLMLoader
 
-llm-loader is a lightweight yet powerful Python package that transforms any document into LLM-ready chunks. It handles the entire document processing pipeline:
+smart-llm-loader is a lightweight yet powerful Python package that transforms any document into LLM-ready chunks. It handles the entire document processing pipeline:
 
 - 📄 Converts documents to clean markdown
 - 🔍 Built-in OCR for scanned documents and images
@@ -9,9 +9,9 @@ llm-loader is a lightweight yet powerful Python package that transforms any docu
 - 📦 Ready for vector stores and LLM ingestion
 
 Spend less time on preprocessing headaches and more time building what matters. From RAG systems to chatbots to document Q&A, 
-LLMLoader handles the heavy lifting so you can focus on creating exceptional AI applications. 
+SmartLLMLoader handles the heavy lifting so you can focus on creating exceptional AI applications. 
 
-LLMLoader's chunking approach has been benchmarked against traditional methods, showing superior performance particularly when paired with Google's Gemini Flash model. This combination offers an efficient and cost-effective solution for document chunking in RAG systems. View the detailed performance comparison [here](https://www.sergey.fyi/articles/gemini-flash-2).
+SmartLLMLoader's chunking approach has been benchmarked against traditional methods, showing superior performance particularly when paired with Google's Gemini Flash model. This combination offers an efficient and cost-effective solution for document chunking in RAG systems. View the detailed performance comparison [here](https://www.sergey.fyi/articles/gemini-flash-2).
 
 
 ## Features
@@ -45,24 +45,24 @@ brew install poppler
 
 ### Package Installation
 
-You can install LLMLoader using pip:
+You can install SmartLLMLoader using pip:
 
 ```bash
-pip install llm-loader
+pip install smart-llm-loader
 ```
 
 Or using Poetry:
 
 ```bash
-poetry add llm-loader
+poetry add smart-llm-loader
 ```
 
 ## Quick Start
-llm-loader package uses litellm to call the LLM so any arguments supported by litellm can be used. You can find the litellm documentation [here](https://docs.litellm.ai/docs/providers).
+smart-llm-loader package uses litellm to call the LLM so any arguments supported by litellm can be used. You can find the litellm documentation [here](https://docs.litellm.ai/docs/providers).
 You can use any multi-modal model supported by litellm.
 
 ```python
-from llm_loader import LLMLoader
+from smart_llm_loader import SmartLLMLoader
 
 
 # Using Gemini Flash model
@@ -79,7 +79,7 @@ model = "anthropic/claude-3-5-sonnet"
 
 
 # Initialize the document loader
-loader = LLMLoader(
+loader = SmartLLMLoader(
     file_path="your_document.pdf",
     chunk_strategy="contextual",
     model=model,
@@ -91,7 +91,7 @@ documents = loader.load_and_split()
 ## Parameters
 
 ```python
-class LLMLoader(BaseLoader):
+class SmartLLMLoader(BaseLoader):
     """A flexible document loader that supports multiple input types."""
 
     def __init__(
@@ -110,7 +110,7 @@ class LLMLoader(BaseLoader):
 
 ## Comparison with Traditional Methods
 
-Let's see LLMLoader in action! We'll compare it with PyMuPDF (a popular traditional document loader) to demonstrate why LLMLoader's intelligent chunking makes such a difference in real-world applications.
+Let's see SmartLLMLoader in action! We'll compare it with PyMuPDF (a popular traditional document loader) to demonstrate why SmartLLMLoader's intelligent chunking makes such a difference in real-world applications.
 
 ### The Challenge: Processing an Invoice
 We'll process this sample invoice that includes headers, tables, and complex formatting:
@@ -119,8 +119,8 @@ We'll process this sample invoice that includes headers, tables, and complex for
 
 ### Head-to-Head Comparison
 
-#### 1. LLMLoader Output
-LLMLoader intelligently breaks down the document into semantic chunks, preserving structure and meaning (note that the json output below has been formatted for readability):
+#### 1. SmartLLMLoader Output
+SmartLLMLoader intelligently breaks down the document into semantic chunks, preserving structure and meaning (note that the json output below has been formatted for readability):
 
 ```json
 [
@@ -232,7 +232,7 @@ Let's see how this difference affects a real Question-Answering system:
 ```python
 question = "What is the total gross worth for item 1 and item 7?"
 
-# LLMLoader Result ✅
+# SmartLLMLoader Result ✅
 "The total gross worth for item 1 (Lilly Pulitzer dress) is $247.50 and for item 7 
 (J.Crew Collection sweater dress) is $33.00. 
 Total: $280.50"
@@ -242,7 +242,7 @@ Total: $280.50"
 Total: $78.00"
 ```
 
-**Why LLMLoader Won:**
+**Why SmartLLMLoader Won:**
 - 🎯 Maintained table structure
 - 💡 Preserved relationships between data
 - 📊 Accurate calculations

diff --git a/examples/data/test_ocr_doc.png b/examples/data/test_ocr_doc.png
diff --git a/examples/ocr_example.py b/examples/ocr_example.py
@@ -1,10 +1,10 @@
 """
-Example usage of different document loaders (llm_loader and PyMuPDF) for RAG applications.
+Example usage of different document loaders (smart-llm-loader and PyMuPDF) for RAG applications.
 """
 import os
 from dotenv import load_dotenv
 
-from llm_loader.document_loader import LLMLoader
+from smart_llm_loader import SmartLLMLoader
 
 # Load environment variables
 load_dotenv()
@@ -13,14 +13,14 @@
 os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"
 
 # Gemini API key since we are using the gemini flash model
-os.environ["GEMINI_API_KEY"] = "YOUR_GEMINI"
+os.environ["GEMINI_API_KEY"] = "YOUR_GEMINI_API_KEY"
 
 
 def process_with_llmloader():
-    """Process documents using LLMLoader with Gemini Flash."""
+    """Process documents using SmartLLMLoader with Gemini Flash."""
 
-    # Initialize the loader from the llm_loader package
-    loader = LLMLoader(
+    # Initialize the loader from the smart-llm-loader package
+    loader = SmartLLMLoader(
         file_path="./data/test_ocr_doc.pdf",
         chunk_strategy="contextual",
         model="gemini/gemini-1.5-flash",

diff --git a/examples/rag_example.py b/examples/rag_example.py
@@ -1,17 +1,17 @@
 """
-Example usage of different document loaders (llm_loader and PyMuPDF) for RAG applications.
+Example usage of different document loaders (smart-llm-loader and PyMuPDF) for RAG applications.
 """
 import os
 from dotenv import load_dotenv
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.chat_models import ChatOpenAI
 from langchain_community.document_loaders import PyMuPDFLoader
-from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import PromptTemplate
 from langchain_core.runnables import RunnablePassthrough
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_community.vectorstores import FAISS
-from llm_loader.document_loader import LLMLoader
-from langchain_core.prompts import PromptTemplate
+
+from smart_llm_loader import SmartLLMLoader
 
 # Load environment variables
 load_dotenv()
@@ -20,7 +20,7 @@
 os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"
 
 # Gemini API key since we are using the gemini flash model
-os.environ["GEMINI_API_KEY"] = "YOUR_GEMINI"
+os.environ["GEMINI_API_KEY"] = "YOUR_GEMINI_API_KEY"
 
 
 def create_rag_chain(retriever, llm):
@@ -48,11 +48,11 @@ def format_docs(docs):
 
 
 def process_with_llmloader():
-    """Process documents using LLMLoader with Gemini Flash."""
+    """Process documents using SmartLLMLoader with Gemini Flash."""
     llm = ChatOpenAI(model="gpt-4o-mini")
 
-    # Initialize the loader from the llm_loader package
-    loader = LLMLoader(
+    # Initialize the loader from the smart-llm-loader package
+    loader = SmartLLMLoader(
         file_path="./data/test_ocr_doc.pdf",
         chunk_strategy="contextual",
         model="gemini/gemini-1.5-flash",

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,12 +1,12 @@
 [tool.poetry]
-name = "llm-loader"
+name = "smart-llm-loader"
 version = "0.1.0"
 description = "A powerful PDF processing toolkit that seamlessly integrates with LLMs for intelligent document chunking and RAG applications. Features smart context-aware segmentation, multi-LLM support, and optimized content extraction for enhanced RAG performance."
 authors = ["drmingler <[email protected]>"]
 readme = "README.md"
-packages = [{include = "llm_loader"}]
+packages = [{include = "smart_llm_loader"}]
 license = "MIT"
-repository = "https://github.com/drmingler/llm-loader"
+repository = "https://github.com/drmingler/smart-llm-loader"
 keywords = ["pdf", "llm", "rag", "document-processing", "ai"]
 classifiers = [
     "Development Status :: 4 - Beta",
@@ -44,5 +44,5 @@ build-backend = "poetry.core.masonry.api"
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py"]
-addopts = "-v --cov=llm_loader --cov-report=term-missing"
+addopts = "-v --cov=smart_llm_loader --cov-report=term-missing"
 asyncio_mode = "auto"
diff --git a/llm_loader/__init__.py → smart_llm_loader/__init__.py b/llm_loader/__init__.py → smart_llm_loader/__init__.py
@@ -38,6 +38,6 @@ def _check_poppler_installation():
 _check_poppler_installation()
 
 # Import main package components
-from .document_loader import LLMLoader
+from .document_loader import SmartLLMLoader
 
 __version__ = "0.1.0"
diff --git a/llm_loader/document_loader.py → smart_llm_loader/document_loader.py b/llm_loader/document_loader.py → smart_llm_loader/document_loader.py
@@ -9,11 +9,11 @@
 from langchain_core.documents import Document
 import requests
 
-from llm_loader.llm import ImageProcessor, LLMProcessing
-from llm_loader.utils import copy_file, save_output_file, is_pdf
+from smart_llm_loader.llm import ImageProcessor, LLMProcessing
+from smart_llm_loader.utils import copy_file, save_output_file, is_pdf
 
 
-class LLMLoader(BaseLoader):
+class SmartLLMLoader(BaseLoader):
     """A flexible document loader that supports multiple input types."""
 
     def __init__(

diff --git a/llm_loader/llm.py → smart_llm_loader/llm.py b/llm_loader/llm.py → smart_llm_loader/llm.py
@@ -10,9 +10,9 @@
 from pdf2image import convert_from_path
 from litellm import completion, validate_environment, supports_vision, check_valid_key, acompletion
 
-from llm_loader.prompts import DEFAULT_PAGE_CHUNK_PROMPT, DEFAULT_CHUNK_PROMPT
-from llm_loader.schema import OCRResponse
-from llm_loader.utils import save_output_file
+from smart_llm_loader.prompts import DEFAULT_PAGE_CHUNK_PROMPT, DEFAULT_CHUNK_PROMPT
+from smart_llm_loader.schema import OCRResponse
+from smart_llm_loader.utils import save_output_file
 
 
 class ImageProcessor:

diff --git a/llm_loader/prompts.py → smart_llm_loader/prompts.py b/llm_loader/prompts.py → smart_llm_loader/prompts.py
diff --git a/llm_loader/schema.py → smart_llm_loader/schema.py b/llm_loader/schema.py → smart_llm_loader/schema.py
diff --git a/llm_loader/utils.py → smart_llm_loader/utils.py b/llm_loader/utils.py → smart_llm_loader/utils.py
diff --git a/tests/test_document_loader.py b/tests/test_document_loader.py
@@ -4,15 +4,15 @@
 from unittest.mock import Mock
 from langchain_core.documents import Document
 
-from llm_loader.document_loader import LLMLoader
+from smart_llm_loader.document_loader import SmartLLMLoader
 
 
 @pytest.fixture(autouse=True)
 def mock_llm_validation(mocker):
     """Mock LLM validation for all tests."""
-    mocker.patch('llm_loader.llm.validate_environment', return_value={"keys_in_environment": True})
-    mocker.patch('llm_loader.llm.supports_vision', return_value=True)
-    mocker.patch('llm_loader.llm.check_valid_key', return_value=True)
+    mocker.patch('smart_llm_loader.llm.validate_environment', return_value={"keys_in_environment": True})
+    mocker.patch('smart_llm_loader.llm.supports_vision', return_value=True)
+    mocker.patch('smart_llm_loader.llm.check_valid_key', return_value=True)
 
 
 @pytest.fixture
@@ -31,7 +31,7 @@ def mock_response():
 
 
 def test_init_with_file_path(sample_pdf_path):
-    loader = LLMLoader(file_path=sample_pdf_path)
+    loader = SmartLLMLoader(file_path=sample_pdf_path)
     assert str(loader.file_path) == str(sample_pdf_path)
     assert loader.output_dir is None
 
@@ -42,23 +42,23 @@ def test_init_with_url(mocker, mock_response):
 
     with tempfile.NamedTemporaryFile(suffix='.pdf') as temp_file:
         mocker.patch('tempfile.NamedTemporaryFile', return_value=temp_file)
-        loader = LLMLoader(url=url)
+        loader = SmartLLMLoader(url=url)
         assert isinstance(loader.file_path, Path)
 
 
 def test_init_with_both_file_and_url(sample_pdf_path):
     with pytest.raises(ValueError, match=r"Only one of file_path or url should be provided\."):
-        LLMLoader(file_path=sample_pdf_path, url="http://example.com/test.pdf")
+        SmartLLMLoader(file_path=sample_pdf_path, url="http://example.com/test.pdf")
 
 
 def test_init_with_neither_file_nor_url():
     with pytest.raises(ValueError, match=r"Either file_path or url must be provided\."):
-        LLMLoader()
+        SmartLLMLoader()
 
 
 def test_load_from_path_with_output_dir(sample_pdf_path, tmp_path):
     output_dir = tmp_path / "output"
-    loader = LLMLoader(file_path=sample_pdf_path, save_output=True, output_dir=output_dir)
+    loader = SmartLLMLoader(file_path=sample_pdf_path, save_output=True, output_dir=output_dir)
 
     assert loader.output_dir == output_dir
     assert (output_dir / sample_pdf_path.name).exists()
@@ -72,14 +72,14 @@ def test_load_from_url_invalid_content(mocker):
     mocker.patch('requests.get', return_value=mock_resp)
 
     with pytest.raises(ValueError, match=r"The URL does not point to a PDF file\."):
-        LLMLoader(url=url)
+        SmartLLMLoader(url=url)
 
 
 def test_load_method(mocker, sample_pdf_path):
     mock_documents = [Document(page_content="Test content")]
-    mocker.patch('llm_loader.llm.LLMProcessing.process_document_with_llm', return_value=mock_documents)
+    mocker.patch('smart_llm_loader.llm.LLMProcessing.process_document_with_llm', return_value=mock_documents)
 
-    loader = LLMLoader(file_path=sample_pdf_path)
+    loader = SmartLLMLoader(file_path=sample_pdf_path)
     documents = loader.load()
 
     assert len(documents) == 1
@@ -89,9 +89,9 @@ def test_load_method(mocker, sample_pdf_path):
 @pytest.mark.asyncio
 async def test_aload_method(mocker, sample_pdf_path):
     mock_documents = [Document(page_content="Test content")]
-    mocker.patch('llm_loader.llm.LLMProcessing.async_process_document_with_llm', return_value=mock_documents)
+    mocker.patch('smart_llm_loader.llm.LLMProcessing.async_process_document_with_llm', return_value=mock_documents)
 
-    loader = LLMLoader(file_path=sample_pdf_path)
+    loader = SmartLLMLoader(file_path=sample_pdf_path)
     documents = await loader.aload()
 
     assert len(documents) == 1
@@ -100,17 +100,17 @@ async def test_aload_method(mocker, sample_pdf_path):
 
 def test_load_and_split_method(mocker, sample_pdf_path):
     mock_documents = [Document(page_content="Test content")]
-    mocker.patch('llm_loader.llm.LLMProcessing.process_document_with_llm', return_value=mock_documents)
+    mocker.patch('smart_llm_loader.llm.LLMProcessing.process_document_with_llm', return_value=mock_documents)
 
-    loader = LLMLoader(file_path=sample_pdf_path, chunk_strategy="contextual")
+    loader = SmartLLMLoader(file_path=sample_pdf_path, chunk_strategy="contextual")
     documents = loader.load_and_split()
 
     assert len(documents) == 1
     assert documents[0].page_content == "Test content"
 
 
 def test_create_document(sample_pdf_path):
-    loader = LLMLoader(file_path=sample_pdf_path)
+    loader = SmartLLMLoader(file_path=sample_pdf_path)
     chunk = {"content": "Test content", "theme": "Test theme"}
     page_num = 1
 
@@ -128,10 +128,10 @@ def test_lazy_load(mocker, sample_pdf_path):
     mock_images = [Mock()]
     mock_result = {"markdown_chunks": [{"content": "Test content", "theme": "Test theme"}]}
 
-    mocker.patch('llm_loader.llm.ImageProcessor.pdf_to_images', return_value=mock_images)
-    mocker.patch('llm_loader.llm.LLMProcessing.process_image_with_llm', return_value=mock_result)
+    mocker.patch('smart_llm_loader.llm.ImageProcessor.pdf_to_images', return_value=mock_images)
+    mocker.patch('smart_llm_loader.llm.LLMProcessing.process_image_with_llm', return_value=mock_result)
 
-    loader = LLMLoader(file_path=sample_pdf_path)
+    loader = SmartLLMLoader(file_path=sample_pdf_path)
     documents = list(loader.lazy_load())
 
     assert len(documents) == 1
@@ -145,10 +145,10 @@ async def test_alazy_load(mocker, sample_pdf_path):
     mock_images = [Mock()]
     mock_result = {"markdown_chunks": [{"content": "Test content", "theme": "Test theme"}]}
 
-    mocker.patch('llm_loader.llm.ImageProcessor.pdf_to_images', return_value=mock_images)
-    mocker.patch('llm_loader.llm.LLMProcessing.async_process_image_with_llm', return_value=mock_result)
+    mocker.patch('smart_llm_loader.llm.ImageProcessor.pdf_to_images', return_value=mock_images)
+    mocker.patch('smart_llm_loader.llm.LLMProcessing.async_process_image_with_llm', return_value=mock_result)
 
-    loader = LLMLoader(file_path=sample_pdf_path)
+    loader = SmartLLMLoader(file_path=sample_pdf_path)
     documents = [doc async for doc in loader.alazy_load()]
 
     assert len(documents) == 1

diff --git a/tests/test_image_processor.py b/tests/test_image_processor.py
@@ -3,7 +3,7 @@
 import io
 import base64
 
-from llm_loader.llm import ImageProcessor
+from smart_llm_loader.llm import ImageProcessor
 
 
 @pytest.fixture
@@ -24,7 +24,7 @@ def sample_image():
 def test_pdf_to_images(sample_pdf_path, mocker):
     # Mock pdf2image.convert_from_path
     mock_images = [Image.new('RGB', (100, 100)) for _ in range(2)]
-    mocker.patch('llm_loader.llm.convert_from_path', return_value=mock_images)
+    mocker.patch('smart_llm_loader.llm.convert_from_path', return_value=mock_images)
 
     images = ImageProcessor.pdf_to_images(sample_pdf_path)