obsidian_chat.py

#!/usr/bin/env python3
"""
Obsidian Chat - Chat with your Obsidian vault using Bhumi LLM client

Usage:
    python obsidian_chat.py [--vault-path /path/to/vault]
"""

import os
import sys
import argparse
import glob
import platform
from pathlib import Path
from typing import List, Optional
import dotenv

from bhumi.base_client import BaseLLMClient, LLMConfig

# Load environment variables
dotenv.load_dotenv()

def find_obsidian_vault() -> List[str]:
    """Find Obsidian vault directories based on OS."""
    possible_vaults = []
    
    system = platform.system()
    home = Path.home()
    
    if system == "Darwin":  # macOS
        # Common locations on macOS
        search_paths = [
            home / "Documents" / "Obsidian",
            home / "Documents",
            home / "Library" / "Application Support" / "obsidian"
        ]
    elif system == "Windows":
        # Common locations on Windows
        search_paths = [
            home / "Documents" / "Obsidian",
            home / "AppData" / "Local" / "Obsidian",
            home / "AppData" / "Roaming" / "Obsidian"
        ]
    else:  # Linux and others
        search_paths = [
            home / "Documents" / "Obsidian",
            home / ".obsidian",
            home / ".config" / "obsidian"
        ]
    
    # Check each path for vault directories
    for path in search_paths:
        if path.exists():
            # Look for .obsidian directory which indicates a vault
            if (path / ".obsidian").exists():
                possible_vaults.append(str(path))
            
            # Also check subdirectories for .obsidian folders
            for subdir in path.iterdir():
                if subdir.is_dir() and (subdir / ".obsidian").exists():
                    possible_vaults.append(str(subdir))
    
    return possible_vaults

def get_vault_files(vault_path: str) -> List[str]:
    """Get all markdown files from the vault."""
    markdown_files = []
    
    # Use glob to find all markdown files
    for extension in ['md', 'markdown']:
        markdown_files.extend(glob.glob(f"{vault_path}/**/*.{extension}", recursive=True))
    
    return markdown_files

def read_file_content(file_path: str) -> str:
    """Read and return the content of a file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    except Exception as e:
        return f"Error reading file: {str(e)}"

async def chat_with_vault(vault_path: str, model: str = "jan/qwen2.5-coder-7b-instruct"):
    """Interactive chat session with the Obsidian vault content."""
    print(f"📚 Loading Obsidian vault from: {vault_path}")
    
    # Get all markdown files
    files = get_vault_files(vault_path)
    if not files:
        print("❌ No markdown files found in the vault.")
        return
    
    print(f"🔍 Found {len(files)} markdown files in your vault.")
    
    # Configure Bhumi client
    config = LLMConfig(
        api_key="not-needed",
        model=model,
        max_tokens=1500,
    )
    
    # Create client
    client = BaseLLMClient(config)
    
    # Create a context with info about the vault
    vault_info = f"This Obsidian vault contains {len(files)} markdown files.\n"
    vault_info += "Here are some of the files:\n"
    for i, file in enumerate(files[:10]):  # Show first 10 files
        relative_path = os.path.relpath(file, vault_path)
        vault_info += f"- {relative_path}\n"
    
    if len(files) > 10:
        vault_info += f"... and {len(files) - 10} more files.\n"
    
    print("\n🤖 Welcome to Obsidian Chat! Type 'exit' to quit or 'help' for commands.")
    
    context = []
    
    while True:
        try:
            user_input = input("\n💬 You: ")
            
            if user_input.lower() in ['exit', 'quit']:
                print("👋 Goodbye!")
                break
                
            if user_input.lower() == 'help':
                print("\nCommands:")
                print("  help - Show this help message")
                print("  exit/quit - Exit the chat")
                print("  search [query] - Search for files in your vault")
                print("  read [filename] - Read a specific file")
                print("  clear - Clear the conversation history")
                continue
                
            if user_input.lower().startswith('search '):
                search_term = user_input[7:].strip()
                results = []
                for file in files:
                    if search_term.lower() in file.lower():
                        relative_path = os.path.relpath(file, vault_path)
                        results.append(relative_path)
                
                if results:
                    print(f"\nFound {len(results)} matching files:")
                    for i, result in enumerate(results[:10]):
                        print(f"  {i+1}. {result}")
                    if len(results) > 10:
                        print(f"  ... and {len(results) - 10} more")
                else:
                    print("No matching files found.")
                continue
                
            if user_input.lower().startswith('read '):
                file_name = user_input[5:].strip()
                found = False
                
                for file in files:
                    if file_name.lower() in file.lower() or file_name.lower() in os.path.basename(file).lower():
                        content = read_file_content(file)
                        print(f"\nContent of {os.path.relpath(file, vault_path)}:")
                        print(f"------ BEGIN ------\n{content[:500]}")
                        if len(content) > 500:
                            print("...(content truncated)...")
                        print("------- END -------")
                        found = True
                        break
                
                if not found:
                    print(f"File '{file_name}' not found. Use 'search' to find files.")
                continue
                
            if user_input.lower() == 'clear':
                context = []
                print("🧹 Conversation history cleared.")
                continue
            
            # Process regular chat input
            if not context:
                # First message, set up the system context
                context = [
                    {"role": "system", "content": f"You are a helpful assistant that can chat about the user's Obsidian vault. When the user asks about a specific file, automatically access and summarize its contents. {vault_info}"}
                ]
            
            # Check if user is asking about a specific file or topic
            mentioned_files = []
            file_content = ""
            
            for file in files:
                file_basename = os.path.basename(file).lower()
                file_name_without_ext = os.path.splitext(file_basename)[0].lower()
                
                # Check if file name is mentioned in user input
                if (file_name_without_ext in user_input.lower() or 
                    file_basename in user_input.lower()):
                    content = read_file_content(file)
                    relative_path = os.path.relpath(file, vault_path)
                    file_content += f"\n--- Content of {relative_path} ---\n{content}\n"
                    mentioned_files.append(relative_path)
            
            # Check for topic matches if no direct file mentions
            if not mentioned_files and not user_input.lower().startswith(('search', 'read', 'help', 'exit', 'quit', 'clear')):
                for file in files:
                    content = read_file_content(file)
                    # Simple check for topic relevance
                    words = user_input.lower().split()
                    relevant_words = [word for word in words if len(word) > 3 and word.lower() not in ["what", "where", "when", "which", "this", "that", "these", "those", "want", "have", "about", "from"]]
                    
                    if any(word in content.lower() for word in relevant_words):
                        relative_path = os.path.relpath(file, vault_path)
                        if len(file_content) < 5000:  # Limit total content
                            file_content += f"\n--- Content of {relative_path} that might be relevant ---\n{content[:800]}\n"
                            mentioned_files.append(relative_path)
            
            # Add files content to the message if any were found
            if mentioned_files:
                enhanced_input = f"{user_input}\n\nRelevant file content:\n{file_content}"
                context.append({"role": "user", "content": enhanced_input})
                print(f"📄 Including content from: {', '.join(mentioned_files)}")
            else:
                # Add regular user message
                context.append({"role": "user", "content": user_input})
            
            # Stream response from Bhumi
            print("\n🤖 Assistant: ", end="", flush=True)
            full_response = ""
            
            # Get streaming response
            stream = await client.completion(context, stream=True)
            
            # Process the streaming response correctly
            async for chunk in stream:
                full_response += chunk
                print(chunk, end="", flush=True)
            
            # Add a newline after streaming completes
            print()
            
            # Add assistant message to context
            context.append({"role": "assistant", "content": full_response})
            
        except KeyboardInterrupt:
            print("\n👋 Goodbye!")
            break
        except Exception as e:
            print(f"❌ Error: {str(e)}")

def main():
    parser = argparse.ArgumentParser(description="Chat with your Obsidian vault using Bhumi")
    parser.add_argument("--vault-path", type=str, help="Path to your Obsidian vault")
    parser.add_argument("--model", type=str, default="jan/qwen2.5-coder-7b-instruct", 
                        help="Model to use with Bhumi")
    args = parser.parse_args()
    
    vault_path = args.vault_path
    
    if not vault_path:
        # Try to find vaults automatically
        possible_vaults = find_obsidian_vault()
        
        if not possible_vaults:
            print("❌ No Obsidian vaults found. Please specify the path with --vault-path.")
            sys.exit(1)
        
        if len(possible_vaults) == 1:
            vault_path = possible_vaults[0]
            print(f"🔍 Found Obsidian vault at: {vault_path}")
        else:
            print("🔍 Found multiple Obsidian vaults:")
            for i, path in enumerate(possible_vaults):
                print(f"{i+1}. {path}")
            
            choice = input("Please select a vault (number): ")
            try:
                index = int(choice) - 1
                if 0 <= index < len(possible_vaults):
                    vault_path = possible_vaults[index]
                else:
                    print("❌ Invalid selection. Exiting.")
                    sys.exit(1)
            except ValueError:
                print("❌ Invalid input. Exiting.")
                sys.exit(1)
    
    # Ensure the vault path exists
    if not os.path.isdir(vault_path):
        print(f"❌ Vault path does not exist: {vault_path}")
        sys.exit(1)
    
    # Run the chat interface
    import asyncio
    asyncio.run(chat_with_vault(vault_path, args.model))

if __name__ == "__main__":
    main()