From cc1e79b27f383347dd2974390b34f56a83bea4fc Mon Sep 17 00:00:00 2001 From: godver3 Date: Mon, 3 Feb 2025 10:33:22 -0700 Subject: [PATCH] 0.5.69 - enabled manual scraper plex refresher (add to checking queue), fixed scraper error re: genres, adjusted watch history to include both server and api watch history, added debug template /database/watch_history for reviewing as needed, removed first aired filter for web scraper, Memory chart on performance dashboard not updating to current (showing for a few hours ago) - potential fix implemented, only take approved requests from overseerr (previously just took all requests without filtering) --- content_checkers/overseerr.py | 5 +- queues/scraping_queue.py | 2 +- routes/database_routes.py | 109 ++- routes/performance_routes.py | 61 ++ routes/scraper_routes.py | 2 +- templates/performance/dashboard.html | 141 ++-- templates/watch_history.html | 108 +++ utilities/plex_watch_history_functions.py | 823 ++++++++++++++-------- version.txt | 2 +- web_scraper.py | 18 +- 10 files changed, 885 insertions(+), 386 deletions(-) create mode 100644 templates/watch_history.html diff --git a/content_checkers/overseerr.py b/content_checkers/overseerr.py index 2080f014..3e1fcc64 100755 --- a/content_checkers/overseerr.py +++ b/content_checkers/overseerr.py @@ -62,9 +62,10 @@ def fetch_overseerr_wanted_content(overseerr_url: str, overseerr_api_key: str, t while True: try: - #logging.debug(f"Fetching Overseerr requests page {page}") + request_url = get_url(overseerr_url, f"/api/v1/request?take={take}&skip={skip}&filter=approved") + logging.debug(f"Fetching Overseerr requests with URL: {request_url}") response = api.get( - get_url(overseerr_url, f"/api/v1/request?take={take}&skip={skip}"), + request_url, headers=headers, timeout=REQUEST_TIMEOUT ) diff --git a/queues/scraping_queue.py b/queues/scraping_queue.py index de72ba7c..cee01529 100755 --- a/queues/scraping_queue.py +++ b/queues/scraping_queue.py @@ -196,7 +196,7 @@ def scrape_with_fallback(self, item, is_multi_pack, queue_manager, skip_filter=F logging.info(f"Filtered out {len(results) - len(filtered_results)} results due to not wanted magnets/URLs") results = filtered_results - is_anime = True if 'anime' in item['genres'] else False + is_anime = True if item.get('genres') and 'anime' in item['genres'] else False # For episodes, filter by exact season/episode match if not is_anime: diff --git a/routes/database_routes.py b/routes/database_routes.py index 45ed77ef..54f81a44 100755 --- a/routes/database_routes.py +++ b/routes/database_routes.py @@ -12,6 +12,8 @@ from .models import admin_required from utilities.plex_functions import remove_file_from_plex from database.database_reading import get_media_item_by_id +import os +from datetime import datetime database_bp = Blueprint('database', __name__) @@ -341,4 +343,109 @@ def apply_parsed_versions(): }) except Exception as e: logging.error(f"Error applying parsed versions: {str(e)}") - return jsonify({'success': False, 'error': str(e)}), 500 \ No newline at end of file + return jsonify({'success': False, 'error': str(e)}), 500 + +@database_bp.route('/watch_history', methods=['GET']) +@admin_required +def watch_history(): + try: + # Get database connection + db_dir = os.environ.get('USER_DB_CONTENT', '/user/db_content') + db_path = os.path.join(db_dir, 'watch_history.db') + + if not os.path.exists(db_path): + flash("Watch history database not found. Please sync Plex watch history first.", "warning") + return render_template('watch_history.html', items=[]) + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Get filter parameters + content_type = request.args.get('type', 'all') # 'movie', 'episode', or 'all' + sort_by = request.args.get('sort', 'watched_at') # 'title' or 'watched_at' + sort_order = request.args.get('order', 'desc') # 'asc' or 'desc' + + # Build query + query = """ + SELECT title, type, watched_at, season, episode, show_title, source + FROM watch_history + WHERE 1=1 + """ + params = [] + + if content_type != 'all': + query += " AND type = ?" + params.append(content_type) + + query += f" ORDER BY {sort_by} {sort_order}" + + # Execute query + cursor.execute(query, params) + items = cursor.fetchall() + + # Convert to list of dicts for easier template handling + formatted_items = [] + for item in items: + title, type_, watched_at, season, episode, show_title, source = item + + # Format the watched_at date + try: + watched_at = datetime.strptime(watched_at, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M') + except: + watched_at = 'Unknown' + + # Format the display title + if type_ == 'episode' and show_title: + display_title = f"{show_title} - S{season:02d}E{episode:02d} - {title}" + else: + display_title = title + + formatted_items.append({ + 'title': display_title, + 'type': type_, + 'watched_at': watched_at, + 'source': source + }) + + conn.close() + + return render_template('watch_history.html', + items=formatted_items, + content_type=content_type, + sort_by=sort_by, + sort_order=sort_order) + + except Exception as e: + logging.error(f"Error in watch history route: {str(e)}") + flash(f"Error retrieving watch history: {str(e)}", "error") + return render_template('watch_history.html', items=[]) + +@database_bp.route('/watch_history/clear', methods=['POST']) +@admin_required +def clear_watch_history(): + try: + # Get database connection + db_dir = os.environ.get('USER_DB_CONTENT', '/user/db_content') + db_path = os.path.join(db_dir, 'watch_history.db') + + if not os.path.exists(db_path): + return jsonify({'success': False, 'error': 'Watch history database not found'}) + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Clear the watch history table + cursor.execute('DELETE FROM watch_history') + + # Reset the auto-increment counter + cursor.execute('DELETE FROM sqlite_sequence WHERE name = "watch_history"') + + conn.commit() + conn.close() + + logging.info("Watch history cleared successfully") + return jsonify({'success': True}) + + except Exception as e: + logging.error(f"Error clearing watch history: {str(e)}") + return jsonify({'success': False, 'error': str(e)}) \ No newline at end of file diff --git a/routes/performance_routes.py b/routes/performance_routes.py index e7891e2f..0eff9393 100644 --- a/routes/performance_routes.py +++ b/routes/performance_routes.py @@ -125,3 +125,64 @@ def get_cpu_metrics(): except Exception as e: return jsonify({'error': str(e)}), 500 + +@performance_bp.route('/api/performance/memory') +def get_memory_metrics(): + """Get memory performance metrics from the log file.""" + log_dir = os.environ.get('USER_LOGS', '/user/logs') + log_file = os.path.join(log_dir, 'performance_log.json') + + # Get optional time range parameters + hours = request.args.get('hours', type=int, default=1) # Default to last hour + limit = request.args.get('limit', type=int, default=60) # Default to 60 entries + cutoff_time = datetime.now() - timedelta(hours=hours) + + try: + entries = [] + if os.path.exists(log_file): + with open(log_file, 'r') as f: + for line in f: + try: + entry = json.loads(line.strip()) + # Only process memory metric entries + if entry.get('type') not in ['basic_metrics', 'detailed_memory']: + continue + + entry_time = datetime.fromisoformat(entry['timestamp']) + if entry_time >= cutoff_time: + entries.append(entry) + if len(entries) >= limit: + break + except (json.JSONDecodeError, KeyError, ValueError) as e: + continue + + # Sort entries by timestamp + entries.sort(key=lambda x: x.get('timestamp', '')) + + # Calculate summary statistics + summary = {} + if entries: + memory_metrics = [e['metrics'] for e in entries if 'metrics' in e] + if memory_metrics: + rss_values = [m.get('memory_rss', 0) for m in memory_metrics] + vms_values = [m.get('memory_vms', 0) for m in memory_metrics] + system_memory_used = [m.get('system_memory_used', 0) for m in memory_metrics] + + summary = { + 'avg_rss_mb': sum(rss_values) / len(rss_values), + 'max_rss_mb': max(rss_values), + 'min_rss_mb': min(rss_values), + 'avg_vms_mb': sum(vms_values) / len(vms_values), + 'max_vms_mb': max(vms_values), + 'min_vms_mb': min(vms_values), + 'avg_system_memory_used': sum(system_memory_used) / len(system_memory_used), + 'samples': len(memory_metrics) + } + + return jsonify({ + 'summary': summary, + 'entries': entries + }) + + except Exception as e: + return jsonify({'error': str(e)}), 500 diff --git a/routes/scraper_routes.py b/routes/scraper_routes.py index 0f8d04bd..e1fce6ff 100755 --- a/routes/scraper_routes.py +++ b/routes/scraper_routes.py @@ -236,7 +236,7 @@ def add_torrent_to_debrid(): return jsonify({'error': message}), 400 # Check if symlinking is enabled - if get_setting('File Management', 'file_collection_management') == 'Symlinked/Local': + if get_setting('File Management', 'file_collection_management') == 'Symlinked/Local' or 1==1: try: # Convert media type to movie_or_episode format movie_or_episode = 'episode' if media_type == 'tv' or media_type == 'show' else 'movie' diff --git a/templates/performance/dashboard.html b/templates/performance/dashboard.html index fa6db1a3..e408e30b 100644 --- a/templates/performance/dashboard.html +++ b/templates/performance/dashboard.html @@ -168,7 +168,6 @@
Active Threads 0 entry.type === 'basic_metrics'); const detailedMemory = entries.findLast(entry => entry.memory); - const memoryGrowthEntries = entries.filter(entry => entry.type === 'basic_metrics').slice(-10); const resourceHandles = entries.findLast(entry => entry.type === 'file_descriptors'); if (basicMetrics) { @@ -184,105 +183,13 @@
Active Threads 0System Time: ${metrics.cpu_system_time.toFixed(2)}s ` + `User Time: ${metrics.cpu_user_time.toFixed(2)}s`; - - // Update Memory Usage - const memoryRssMB = metrics.memory_rss || 0; - const memoryVmsMB = metrics.memory_vms || 0; - const memoryPercent = metrics.system_memory_used || 0; - const memoryBar = document.getElementById('memory-progress'); - const memoryText = document.getElementById('memory-text'); - - memoryBar.style.width = `${Math.min(memoryPercent, 100)}%`; - memoryText.textContent = `${memoryPercent.toFixed(1)}%`; - - document.getElementById('memory-details').innerHTML = - `RSS Memory: ${memoryRssMB.toFixed(2)} MB ` + - `Virtual Memory: ${memoryVmsMB.toFixed(2)} MB ` + - `Swap Used: ${metrics.swap_used.toFixed(2)} MB`; - } - - // Memory Analysis - if (detailedMemory) { - const memoryData = detailedMemory.memory; - - // Memory by Type - const anonymousHtml = ` -
-
Anonymous Memory
-
- Size: ${memoryData.anonymous.formatted_size} - Count: ${memoryData.anonymous.count.toLocaleString()} mappings -
-
`; - - // Enhanced file-backed memory section - const fileBackedHtml = ` -
-
File-backed Memory
-
- Total Size: ${memoryData.file_backed.formatted_size} - Mappings: ${memoryData.file_backed.count.toLocaleString()} -
-
-
File Details
-
- ${memoryData.open_files.files.map(file => ` -
-
${file.path}
-
${formatBytes(file.size)}
-
- `).join('')} -
-
-
`; - - // Network connections section - const networkHtml = ` -
-
Network Connections
-
- Total: ${memoryData.network.total_connections} - ${Object.entries(memoryData.network.states).map(([state, count]) => - `${state}: ${count}` - ).join('')} -
-
`; - - document.getElementById('memory-analysis').innerHTML = anonymousHtml + fileBackedHtml + networkHtml; - } - - // Memory Growth over time - if (memoryGrowthEntries.length > 0) { - const growthHtml = memoryGrowthEntries - .slice() // Create a copy of the array - .reverse() // Reverse to get most recent first - .slice(0, 5) // Limit to 5 entries - .map(entry => { - const metrics = entry.metrics; - const timestamp = new Date(entry.timestamp).toLocaleString(); - return ` -
- ${timestamp} -
- RSS: ${metrics.memory_rss.toFixed(2)} MB - VMS: ${metrics.memory_vms.toFixed(2)} MB - Swap: ${metrics.swap_used.toFixed(2)} MB -
-
`; - }).join(''); - - document.getElementById('memory-growth-text').innerHTML = growthHtml; } - // Update Memory Growth Chart - updateMemoryChart(memoryGrowthEntries); - // Resource Handles if (resourceHandles) { const metrics = resourceHandles.metrics; document.getElementById('open-files-count').textContent = metrics.open_files_count || 0; - // Format file types as "extension: count" pairs const fileTypesText = metrics.file_types ? Object.entries(metrics.file_types) .map(([ext, count]) => `${ext}: ${count}`) @@ -341,6 +248,54 @@
Network Connections
} }) .catch(error => console.error('Error fetching CPU data:', error)); + + // Fetch memory data separately + fetch('/performance/api/performance/memory?hours=1') + .then(response => response.json()) + .then(data => { + if (data.summary) { + // Update memory summary statistics + const memoryBar = document.getElementById('memory-progress'); + const memoryText = document.getElementById('memory-text'); + const systemMemoryUsed = data.summary.avg_system_memory_used || 0; + + memoryBar.style.width = `${Math.min(systemMemoryUsed, 100)}%`; + memoryText.textContent = `${systemMemoryUsed.toFixed(1)}%`; + + // Get the latest entry + const latestEntry = data.entries[data.entries.length - 1]; + if (latestEntry && latestEntry.metrics) { + const metrics = latestEntry.metrics; + document.getElementById('memory-details').innerHTML = + `RSS Memory: ${metrics.memory_rss.toFixed(2)} MB ` + + `Virtual Memory: ${metrics.memory_vms.toFixed(2)} MB ` + + `Swap Used: ${metrics.swap_used.toFixed(2)} MB`; + } + + // Update memory growth text with recent entries + const recentEntries = data.entries.slice(-5); + const growthHtml = recentEntries + .map(entry => { + const metrics = entry.metrics; + const timestamp = new Date(entry.timestamp).toLocaleString(); + return ` +
+ ${timestamp} +
+ RSS: ${metrics.memory_rss.toFixed(2)} MB + VMS: ${metrics.memory_vms.toFixed(2)} MB + Swap: ${metrics.swap_used.toFixed(2)} MB +
+
`; + }).join(''); + + document.getElementById('memory-growth-text').innerHTML = growthHtml; + + // Update memory history chart + updateMemoryChart(data.entries); + } + }) + .catch(error => console.error('Error fetching memory data:', error)); } function updateCpuChart(entries) { diff --git a/templates/watch_history.html b/templates/watch_history.html new file mode 100644 index 00000000..bc39030c --- /dev/null +++ b/templates/watch_history.html @@ -0,0 +1,108 @@ +{% extends "base.html" %} + +{% block title %}Watch History{% endblock %} + +{% block content %} +
+
+

Plex Watch History

+ +
+ + +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+
+ + + {% if items %} +
+ + + + + + + + + + + {% for item in items %} + + + + + + + {% endfor %} + +
TitleTypeWatched AtSource
{{ item.title }}{{ item.type }}{{ item.watched_at }}{{ item.source }}
+
+ {% else %} +
+ No watch history found. Please sync your Plex watch history first. +
+ {% endif %} +
+ + + + +{% endblock %} \ No newline at end of file diff --git a/utilities/plex_watch_history_functions.py b/utilities/plex_watch_history_functions.py index c05f12ce..d1b6244a 100644 --- a/utilities/plex_watch_history_functions.py +++ b/utilities/plex_watch_history_functions.py @@ -9,14 +9,17 @@ async def get_watch_history_from_plex(): """ - Retrieves the user's complete Plex watch history and stores it in the database. + Retrieves the user's complete Plex watch history from both account history and server libraries, + then stores it in the database. This dual-source approach ensures maximum coverage of watch history. Returns a dictionary with counts of processed movies and episodes. """ try: trakt = TraktMetadata() processed = { 'movies': 0, - 'episodes': 0 + 'episodes': 0, + 'account_items': 0, + 'server_items': 0 } # Get Plex connection details @@ -25,7 +28,7 @@ async def get_watch_history_from_plex(): if not plex_url or not plex_token: logging.error("Plex URL or token not configured") - return {'movies': 0, 'episodes': 0} + return {'movies': 0, 'episodes': 0, 'account_items': 0, 'server_items': 0} logging.info("Connecting to Plex server...") # Connect to Plex server @@ -54,35 +57,30 @@ async def get_watch_history_from_plex(): episode INTEGER, show_title TEXT, duration INTEGER, - watch_progress INTEGER + watch_progress INTEGER, + source TEXT, + UNIQUE(title, type, date(watched_at)) ON CONFLICT REPLACE, + UNIQUE(show_title, season, episode, date(watched_at)) ON CONFLICT REPLACE ) ''') - # Get all watched history from all libraries - logging.info("Fetching watch history from all Plex libraries...") + # Get all watched history from account + logging.info("Fetching history from Plex account...") account = plex.myPlexAccount() logging.info(f"Authenticated as user: {account.username} (ID: {account.id})") - # Get history directly from the account instead of the server - logging.info("Fetching history directly from account...") - history = account.history() + # Get history directly from the account + account_history = account.history() - # Debug first few items - logging.info("\nFirst 5 history items:") - for i, item in enumerate(history[:5]): - logging.info(f"Item {i + 1}:") - logging.info(f" Title: {getattr(item, 'title', 'N/A')}") - logging.info(f" Type: {getattr(item, 'type', 'N/A')}") - logging.info(f" Account ID: {getattr(item, 'accountID', 'N/A')}") - logging.info("---") + # Process account history + total_items = len(account_history) + logging.info(f"Found {total_items} items in account history") - total_items = len(history) - logging.info(f"Found {total_items} items in watch history") - - for i, item in enumerate(history, 1): + # Process account history items + for i, item in enumerate(account_history, 1): try: if i % 100 == 0 or i == total_items: - logging.info(f"Processing item {i}/{total_items} ({(i/total_items*100):.1f}%)") + logging.info(f"Processing account item {i}/{total_items} ({(i/total_items*100):.1f}%)") # Get all available item information for logging item_info = { @@ -101,281 +99,550 @@ async def get_watch_history_from_plex(): 'guids_len': len(item.guids) if hasattr(item, 'guids') else 0 } - # Skip non-video content - if item_info['type'] not in ['movie', 'episode']: - logging.info(f"Skipping non-video content - Item {i} '{item_info['title']}' (Type: {item_info['type']})") - continue - - # Extract basic info and validate required fields - title = item_info['title'] - if not title and item_info['type'] == 'episode' and item_info['grandparentTitle'] and item_info['seasonNumber'] is not None and item_info['index'] is not None: - # Generate title for episodes like "Friends - S04E24" - title = f"{item_info['grandparentTitle']} - S{item_info['seasonNumber']:02d}E{item_info['index']:02d}" - logging.info(f"Generated title for episode: {title}") - - if not title: - logging.warning(f"Skipping item {i}: Missing required field 'title'. Item info: {item_info}") - continue - - watched_at = item_info['viewedAt'] - if not watched_at: - logging.warning(f"Skipping item {i} '{title}': Missing required field 'viewedAt'. Item info: {item_info}") - continue - - # Determine media type from Plex type field - media_type = item_info['type'] - if not media_type: - # Fallback to checking seasonNumber if type is not available - media_type = 'episode' if item_info['seasonNumber'] is not None else 'movie' - - # Get external IDs - imdb_id = None - tmdb_id = None - tvdb_id = None - - # Get GUIDs from the item and log detailed information - guid_info = { - 'has_guids_attr': hasattr(item, 'guids'), - 'guids_count': len(item.guids) if hasattr(item, 'guids') else 0, - 'raw_guid': getattr(item, 'guid', None), - 'guid_list': [] - } + success = await process_watch_history_item(cursor, item_info, item, trakt, 'account', processed) + if success: + processed['account_items'] += 1 - if hasattr(item, 'guids'): - for guid in item.guids: - guid_data = { - 'id': str(guid.id), - 'type': type(guid).__name__, - 'attributes': {attr: str(getattr(guid, attr)) for attr in dir(guid) - if not attr.startswith('_') and not callable(getattr(guid, attr))} - } - guid_info['guid_list'].append(guid_data) - - guid_str = str(guid.id) - if 'imdb://' in guid_str: - imdb_id = guid_str.split('imdb://')[1].split('?')[0] - elif 'tmdb://' in guid_str: - tmdb_id = guid_str.split('tmdb://')[1].split('?')[0] - elif 'tvdb://' in guid_str: - tvdb_id = guid_str.split('tvdb://')[1].split('?')[0] + except Exception as e: + logging.error(f"Error processing account item: {str(e)}") + continue + + # Get history from server libraries + logging.info("\nFetching history from Plex server libraries...") + + # Process each library + for library in plex.library.sections(): + logging.info(f"Processing library: {library.title}") + + if library.type == 'movie': + # Get watched movies + watched_movies = library.search(unwatched=False) + total_movies = len(watched_movies) + logging.info(f"Found {total_movies} potentially watched movies in {library.title}") - if not imdb_id: - # First check if we have this item cached in our database - cached_query = None - if media_type == 'movie': - cached_query = ''' - SELECT imdb_id FROM watch_history - WHERE title = ? AND type = 'movie' AND imdb_id IS NOT NULL - ORDER BY watched_at DESC LIMIT 1 - ''' - cursor.execute(cached_query, (title,)) - else: - # For TV shows, just match on show title since we want the show's IMDb ID - show_title = item_info['grandparentTitle'] - if show_title: - cached_query = ''' - SELECT imdb_id FROM watch_history - WHERE show_title = ? - AND type = 'episode' AND imdb_id IS NOT NULL - ORDER BY watched_at DESC LIMIT 1 - ''' - cursor.execute(cached_query, (show_title,)) + for i, video in enumerate(watched_movies, 1): + if i % 50 == 0 or i == total_movies: + logging.info(f"Processing movie {i}/{total_movies} in {library.title}") - if cached_query: - result = cursor.fetchone() - if result and result[0]: - imdb_id = result[0] - logging.info(f"Found cached IMDb ID {imdb_id} for {'movie' if media_type == 'movie' else 'show'} '{title if media_type == 'movie' else show_title}'") - - if not imdb_id: - # Try to find IMDb ID using Trakt - try: - if media_type == 'movie': - search_year = None - # Try to extract year from title if it's in parentheses - if '(' in title and ')' in title: - title_parts = title.split('(') - if len(title_parts) > 1: - year_part = title_parts[1].split(')')[0] - if year_part.isdigit(): - search_year = int(year_part) - title = title_parts[0].strip() + if video.isWatched: + try: + # Create item_info dictionary for server items + item_info = { + 'title': video.title, + 'type': 'movie', + 'viewedAt': getattr(video, 'lastViewedAt', None), + 'ratingKey': video.ratingKey, + 'duration': video.duration, + 'viewOffset': getattr(video, 'viewOffset', None), + 'guid': video.guid, + 'raw_guids': str([{'id': g.id, 'attrs': dir(g)} for g in video.guids]) if hasattr(video, 'guids') else 'No guids attribute', + 'has_guids_attr': hasattr(video, 'guids'), + 'guids_len': len(video.guids) if hasattr(video, 'guids') else 0 + } - # Search by title - url = f"{trakt.base_url}/search/movie?query={title}" - if search_year: - url += f"&years={search_year}" - response = trakt._make_request(url) - if response and response.status_code == 200: - results = response.json() - if results: - movie_data = results[0]['movie'] - imdb_id = movie_data['ids'].get('imdb') - if imdb_id: - logging.info(f"Found IMDb ID {imdb_id} for movie '{title}' via Trakt") - else: - # For TV shows, we only need the show title - show_title = item_info['grandparentTitle'] - if show_title: - # Search for show - url = f"{trakt.base_url}/search/show?query={show_title}" - response = trakt._make_request(url) - if response and response.status_code == 200: - results = response.json() - if results: - show_data = results[0]['show'] - imdb_id = show_data['ids'].get('imdb') - if imdb_id: - logging.info(f"Found IMDb ID {imdb_id} for show '{show_title}' via Trakt") - except Exception as e: - logging.warning(f"Error looking up IMDb ID via Trakt for '{title}': {str(e)}") - - if not imdb_id: - logging.info(f"No IMDb ID found for '{title}'. GUID information: {guid_info}") - - # Get rating key (media_id) - media_id = str(item_info['ratingKey']) if item_info['ratingKey'] else None - - # Generate synthetic media_id if missing - if not media_id: - if media_type == 'movie': - # Use title and year if available, or just title - if '(' in title and ')' in title: - media_id = f"synthetic_movie_{title.replace(' ', '_')}" - else: - media_id = f"synthetic_movie_{title.replace(' ', '_')}" - else: - # For episodes, use show, season, and episode - show_title = item_info['grandparentTitle'] - season = item_info['seasonNumber'] - episode = item_info['index'] - if show_title and season is not None and episode is not None: - media_id = f"synthetic_{show_title.replace(' ', '_')}_{season}_{episode}" - else: - logging.warning(f"Skipping item {i} '{title}': Missing required fields for synthetic ID. Item info: {item_info}") + success = await process_watch_history_item(cursor, item_info, video, trakt, 'server', processed) + if success: + processed['server_items'] += 1 + + except Exception as e: + logging.error(f"Error processing server movie {video.title}: {str(e)}") continue + + elif library.type == 'show': + # Get watched episodes + shows = library.search() + total_shows = len(shows) + logging.info(f"Processing {total_shows} shows in {library.title}") - if not media_id: - logging.warning(f"Skipping item {i} '{title}': Missing required field 'ratingKey' and couldn't generate synthetic ID. Item info: {item_info}") - continue - - # Before inserting, check if we already have this item for today - if media_type == 'movie': - cursor.execute(''' - SELECT imdb_id, watched_at - FROM watch_history - WHERE title = ? - AND type = 'movie' - AND date(watched_at) = date(?) - ''', (title, watched_at)) - else: - cursor.execute(''' - SELECT imdb_id, watched_at - FROM watch_history - WHERE show_title = ? - AND season = ? - AND episode = ? - AND type = 'episode' - AND date(watched_at) = date(?) - ''', (item_info['grandparentTitle'], item_info['seasonNumber'], item_info['index'], watched_at)) - - existing = cursor.fetchone() - if existing: - existing_imdb, existing_watched = existing - # Convert existing_watched string to datetime for comparison - if isinstance(existing_watched, str): - existing_watched = datetime.strptime(existing_watched, '%Y-%m-%d %H:%M:%S') - - # If existing entry has no IMDb ID and we have one, or if this is more recent, update it - if (existing_imdb is None and imdb_id is not None) or (watched_at and existing_watched and watched_at > existing_watched): - if media_type == 'movie': - cursor.execute(''' - UPDATE watch_history - SET imdb_id = ?, tmdb_id = ?, tvdb_id = ?, - watched_at = ?, duration = ?, watch_progress = ? - WHERE title = ? - AND type = 'movie' - AND date(watched_at) = date(?) - ''', ( - imdb_id, tmdb_id, tvdb_id, - watched_at, item_info['duration'], item_info['viewOffset'], - title, existing_watched - )) - else: - cursor.execute(''' - UPDATE watch_history - SET imdb_id = ?, tmdb_id = ?, tvdb_id = ?, - watched_at = ?, duration = ?, watch_progress = ? - WHERE show_title = ? - AND season = ? - AND episode = ? - AND type = 'episode' - AND date(watched_at) = date(?) - ''', ( - imdb_id, tmdb_id, tvdb_id, - watched_at, item_info['duration'], item_info['viewOffset'], - item_info['grandparentTitle'], item_info['seasonNumber'], item_info['index'], existing_watched - )) - logging.info(f"Updated existing {'movie' if media_type == 'movie' else 'episode'} '{title}' with new IMDb ID or watch time") - continue - - # If no existing entry found, proceed with insert - if media_type == 'movie': - cursor.execute(''' - INSERT OR REPLACE INTO watch_history - (title, type, watched_at, media_id, imdb_id, tmdb_id, tvdb_id, duration, watch_progress) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - ''', ( - title, 'movie', watched_at, media_id, - imdb_id, tmdb_id, tvdb_id, - item_info['duration'], - item_info['viewOffset'] - )) - processed['movies'] += 1 - if processed['movies'] % 50 == 0: - logging.info(f"Processed {processed['movies']} movies") - else: - # Validate episode-specific fields - season = item_info['seasonNumber'] - episode = item_info['index'] - show_title = item_info['grandparentTitle'] + for show_idx, show in enumerate(shows, 1): + if show_idx % 10 == 0 or show_idx == total_shows: + logging.info(f"Processing show {show_idx}/{total_shows} in {library.title}") - if season is None or episode is None: - logging.warning(f"Skipping episode {i} '{title}': Missing season or episode number. Item info: {item_info}") + try: + episodes = show.episodes() + for episode in episodes: + if episode.isWatched: + # Create item_info dictionary for server items + item_info = { + 'title': episode.title, + 'type': 'episode', + 'viewedAt': getattr(episode, 'lastViewedAt', None), + 'ratingKey': episode.ratingKey, + 'seasonNumber': episode.seasonNumber, + 'index': episode.index, + 'grandparentTitle': show.title, + 'duration': episode.duration, + 'viewOffset': getattr(episode, 'viewOffset', None), + 'guid': episode.guid, + 'raw_guids': str([{'id': g.id, 'attrs': dir(g)} for g in episode.guids]) if hasattr(episode, 'guids') else 'No guids attribute', + 'has_guids_attr': hasattr(episode, 'guids'), + 'guids_len': len(episode.guids) if hasattr(episode, 'guids') else 0 + } + + success = await process_watch_history_item(cursor, item_info, episode, trakt, 'server', processed) + if success: + processed['server_items'] += 1 + + except Exception as e: + logging.error(f"Error processing show {show.title}: {str(e)}") continue - - cursor.execute(''' - INSERT OR REPLACE INTO watch_history - (title, type, watched_at, media_id, imdb_id, tmdb_id, tvdb_id, - season, episode, show_title, duration, watch_progress) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - ''', ( - title, 'episode', watched_at, media_id, - imdb_id, tmdb_id, tvdb_id, - season, episode, show_title, - item_info['duration'], - item_info['viewOffset'] - )) - processed['episodes'] += 1 - if processed['episodes'] % 100 == 0: - logging.info(f"Processed {processed['episodes']} episodes") - - except Exception as e: - logging.error(f"Error processing item {title}: {str(e)}") - continue + else: + logging.info(f"Skipping library type: {library.type}") conn.commit() conn.close() - logging.info(f"Watch history sync complete! Processed {processed['movies']} movies and {processed['episodes']} episodes") + logging.info("\nWatch history sync complete!") + logging.info(f"Account history items processed: {processed['account_items']}") + logging.info(f"Server library items processed: {processed['server_items']}") + logging.info(f"Total movies: {processed['movies']}") + logging.info(f"Total episodes: {processed['episodes']}") + return processed except Exception as e: logging.error(f"Error getting watch history: {str(e)}") - return {'movies': 0, 'episodes': 0} + return {'movies': 0, 'episodes': 0, 'account_items': 0, 'server_items': 0} + +async def process_watch_history_item(cursor, item_info, item, trakt, source, processed): + """ + Helper function to process a single watch history item and insert/update it in the database. + Returns True if the item was successfully processed, False otherwise. + """ + try: + # Skip non-video content + if item_info['type'] not in ['movie', 'episode']: + logging.info(f"Skipping non-video content - '{item_info['title']}' (Type: {item_info['type']})") + return False + + # Extract basic info and validate required fields + title = item_info['title'] + if not title and item_info['type'] == 'episode' and item_info['grandparentTitle'] and item_info['seasonNumber'] is not None and item_info['index'] is not None: + # Generate title for episodes like "Friends - S04E24" + title = f"{item_info['grandparentTitle']} - S{item_info['seasonNumber']:02d}E{item_info['index']:02d}" + logging.info(f"Generated title for episode: {title}") + + if not title: + logging.warning(f"Skipping item: Missing required field 'title'. Item info: {item_info}") + return False + + watched_at = item_info['viewedAt'] + if not watched_at: + logging.warning(f"Skipping item '{title}': Missing required field 'viewedAt'. Item info: {item_info}") + return False + + # Get external IDs + imdb_id = None + tmdb_id = None + tvdb_id = None + + if hasattr(item, 'guids'): + for guid in item.guids: + guid_str = str(guid.id) + if 'imdb://' in guid_str: + imdb_id = guid_str.split('imdb://')[1].split('?')[0] + elif 'tmdb://' in guid_str: + tmdb_id = guid_str.split('tmdb://')[1].split('?')[0] + elif 'tvdb://' in guid_str: + tvdb_id = guid_str.split('tvdb://')[1].split('?')[0] + + if not imdb_id: + # Try to find IMDb ID using existing database entry or Trakt + imdb_id = await find_imdb_id(cursor, item_info, title, trakt) + + # Get or generate media_id + media_id = str(item_info['ratingKey']) if item_info['ratingKey'] else generate_synthetic_media_id(item_info, title) + + if not media_id: + logging.warning(f"Skipping item '{title}': Could not generate media ID. Item info: {item_info}") + return False + + # Insert or update the database entry + if item_info['type'] == 'movie': + success = insert_or_update_movie(cursor, title, watched_at, media_id, imdb_id, tmdb_id, tvdb_id, + item_info['duration'], item_info['viewOffset'], source) + if success: + processed['movies'] += 1 + else: + success = insert_or_update_episode(cursor, title, watched_at, media_id, imdb_id, tmdb_id, tvdb_id, + item_info['seasonNumber'], item_info['index'], item_info['grandparentTitle'], + item_info['duration'], item_info['viewOffset'], source) + if success: + processed['episodes'] += 1 + + return success + + except Exception as e: + logging.error(f"Error processing item '{title}': {str(e)}") + return False + +async def find_imdb_id(cursor, item_info, title, trakt): + """Helper function to find IMDb ID from database cache or Trakt""" + media_type = item_info['type'] + + # First check database cache + if media_type == 'movie': + cursor.execute(''' + SELECT imdb_id FROM watch_history + WHERE title = ? AND type = 'movie' AND imdb_id IS NOT NULL + ORDER BY watched_at DESC LIMIT 1 + ''', (title,)) + else: + show_title = item_info['grandparentTitle'] + if show_title: + cursor.execute(''' + SELECT imdb_id FROM watch_history + WHERE show_title = ? + AND type = 'episode' AND imdb_id IS NOT NULL + ORDER BY watched_at DESC LIMIT 1 + ''', (show_title,)) + + result = cursor.fetchone() + if result and result[0]: + return result[0] + + # Try Trakt if no cached ID found + try: + if media_type == 'movie': + search_year = None + if '(' in title and ')' in title: + title_parts = title.split('(') + if len(title_parts) > 1: + year_part = title_parts[1].split(')')[0] + if year_part.isdigit(): + search_year = int(year_part) + title = title_parts[0].strip() + + url = f"{trakt.base_url}/search/movie?query={title}" + if search_year: + url += f"&years={search_year}" + response = trakt._make_request(url) + if response and response.status_code == 200: + results = response.json() + if results: + return results[0]['movie']['ids'].get('imdb') + else: + show_title = item_info['grandparentTitle'] + if show_title: + url = f"{trakt.base_url}/search/show?query={show_title}" + response = trakt._make_request(url) + if response and response.status_code == 200: + results = response.json() + if results: + return results[0]['show']['ids'].get('imdb') + except Exception as e: + logging.warning(f"Error looking up IMDb ID via Trakt for '{title}': {str(e)}") + + return None + +def generate_synthetic_media_id(item_info, title): + """Helper function to generate a synthetic media ID when one is not available""" + media_type = item_info['type'] + + if media_type == 'movie': + return f"synthetic_movie_{title.replace(' ', '_')}" + else: + show_title = item_info['grandparentTitle'] + season = item_info['seasonNumber'] + episode = item_info['index'] + if show_title and season is not None and episode is not None: + return f"synthetic_{show_title.replace(' ', '_')}_{season}_{episode}" + return None + +def insert_or_update_movie(cursor, title, watched_at, media_id, imdb_id, tmdb_id, tvdb_id, duration, view_offset, source): + """Helper function to insert or update a movie entry in the database""" + try: + # First check if we already have this movie for this day + cursor.execute(''' + SELECT id, watched_at, imdb_id + FROM watch_history + WHERE title = ? + AND type = 'movie' + AND date(watched_at) = date(?) + ''', (title, watched_at)) + + existing = cursor.fetchone() + if existing: + existing_id, existing_watched_str, existing_imdb = existing + + # Convert string timestamps to datetime objects for comparison + try: + if isinstance(watched_at, str): + watched_at = datetime.strptime(watched_at, '%Y-%m-%d %H:%M:%S') + if isinstance(existing_watched_str, str): + existing_watched = datetime.strptime(existing_watched_str, '%Y-%m-%d %H:%M:%S') + else: + existing_watched = existing_watched_str + except Exception as e: + logging.warning(f"Error parsing dates for '{title}': {str(e)}") + existing_watched = None + + # If existing entry has no IMDb ID and we have one, or if this is more recent, update it + should_update = (existing_imdb is None and imdb_id is not None) + if not should_update and watched_at and existing_watched: + try: + should_update = watched_at > existing_watched + except Exception as e: + logging.warning(f"Error comparing dates for '{title}': {str(e)}") + should_update = False + + if should_update: + cursor.execute(''' + UPDATE watch_history + SET watched_at = ?, media_id = ?, imdb_id = ?, tmdb_id = ?, tvdb_id = ?, + duration = ?, watch_progress = ?, source = ? + WHERE id = ? + ''', ( + watched_at.strftime('%Y-%m-%d %H:%M:%S') if isinstance(watched_at, datetime) else watched_at, + media_id, imdb_id, tmdb_id, tvdb_id, + duration, view_offset, source, existing_id + )) + logging.debug(f"Updated existing movie entry for '{title}'") + else: + logging.debug(f"Skipping duplicate movie entry for '{title}' (already have more recent or complete entry)") + return True + + # If no existing entry, insert new one + cursor.execute(''' + INSERT INTO watch_history + (title, type, watched_at, media_id, imdb_id, tmdb_id, tvdb_id, duration, watch_progress, source) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + title, 'movie', + watched_at.strftime('%Y-%m-%d %H:%M:%S') if isinstance(watched_at, datetime) else watched_at, + media_id, imdb_id, tmdb_id, tvdb_id, + duration, view_offset, source + )) + logging.debug(f"Inserted new movie entry for '{title}'") + return True + except Exception as e: + logging.error(f"Error inserting/updating movie '{title}': {str(e)}") + return False + +def insert_or_update_episode(cursor, title, watched_at, media_id, imdb_id, tmdb_id, tvdb_id, + season, episode, show_title, duration, view_offset, source): + """Helper function to insert or update an episode entry in the database""" + try: + if season is None or episode is None: + logging.warning(f"Skipping episode '{title}': Missing season or episode number") + return False + + # First check if we already have this episode for this day + cursor.execute(''' + SELECT id, watched_at, imdb_id + FROM watch_history + WHERE show_title = ? + AND season = ? + AND episode = ? + AND type = 'episode' + AND date(watched_at) = date(?) + ''', (show_title, season, episode, watched_at)) + + existing = cursor.fetchone() + if existing: + existing_id, existing_watched_str, existing_imdb = existing + + # Convert string timestamps to datetime objects for comparison + try: + if isinstance(watched_at, str): + watched_at = datetime.strptime(watched_at, '%Y-%m-%d %H:%M:%S') + if isinstance(existing_watched_str, str): + existing_watched = datetime.strptime(existing_watched_str, '%Y-%m-%d %H:%M:%S') + else: + existing_watched = existing_watched_str + except Exception as e: + logging.warning(f"Error parsing dates for '{title}': {str(e)}") + existing_watched = None + + # If existing entry has no IMDb ID and we have one, or if this is more recent, update it + should_update = (existing_imdb is None and imdb_id is not None) + if not should_update and watched_at and existing_watched: + try: + should_update = watched_at > existing_watched + except Exception as e: + logging.warning(f"Error comparing dates for '{title}': {str(e)}") + should_update = False + + if should_update: + cursor.execute(''' + UPDATE watch_history + SET title = ?, watched_at = ?, media_id = ?, imdb_id = ?, tmdb_id = ?, tvdb_id = ?, + duration = ?, watch_progress = ?, source = ? + WHERE id = ? + ''', ( + title, + watched_at.strftime('%Y-%m-%d %H:%M:%S') if isinstance(watched_at, datetime) else watched_at, + media_id, imdb_id, tmdb_id, tvdb_id, + duration, view_offset, source, existing_id + )) + logging.debug(f"Updated existing episode entry for '{show_title} S{season}E{episode}'") + else: + logging.debug(f"Skipping duplicate episode entry for '{show_title} S{season}E{episode}' (already have more recent or complete entry)") + return True + + # If no existing entry, insert new one + cursor.execute(''' + INSERT INTO watch_history + (title, type, watched_at, media_id, imdb_id, tmdb_id, tvdb_id, + season, episode, show_title, duration, watch_progress, source) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + title, 'episode', + watched_at.strftime('%Y-%m-%d %H:%M:%S') if isinstance(watched_at, datetime) else watched_at, + media_id, imdb_id, tmdb_id, tvdb_id, + season, episode, show_title, + duration, view_offset, source + )) + logging.debug(f"Inserted new episode entry for '{show_title} S{season}E{episode}'") + return True + except Exception as e: + logging.error(f"Error inserting/updating episode '{title}': {str(e)}") + return False + +async def test_plex_history_comparison(): + """ + Test function that compares watch history from two Plex sources: + 1. account.history() - history directly from Plex account + 2. PlexServer - history from all libraries on the server + + Prints detailed statistics and any discrepancies found. + """ + try: + # Get Plex connection details + plex_url = get_setting('Plex', 'url') + plex_token = get_setting('Plex', 'token') + + if not plex_url or not plex_token: + logging.error("Plex URL or token not configured") + return + + logging.info("Connecting to Plex server...") + plex = PlexServer(plex_url, plex_token) + account = plex.myPlexAccount() + + # Get history from account + logging.info("Fetching history from Plex account...") + account_history = account.history() + + # Count account history items by type + account_counts = {'movies': 0, 'episodes': 0, 'other': 0} + account_items = {} # Store unique identifiers for comparison + + for item in account_history: + item_type = getattr(item, 'type', 'other') + if item_type == 'movie': + account_counts['movies'] += 1 + key = ('movie', getattr(item, 'title', ''), getattr(item, 'year', '')) + account_items[key] = account_items.get(key, 0) + 1 + elif item_type == 'episode': + account_counts['episodes'] += 1 + show = getattr(item, 'grandparentTitle', '') + season = getattr(item, 'seasonNumber', '') + episode = getattr(item, 'index', '') + key = ('episode', show, season, episode) + account_items[key] = account_items.get(key, 0) + 1 + else: + account_counts['other'] += 1 + + # Get history from server libraries + logging.info("Fetching history from Plex server libraries...") + server_counts = {'movies': 0, 'episodes': 0, 'other': 0} + server_items = {} # Store unique identifiers for comparison + + # Process each library + for library in plex.library.sections(): + logging.info(f"Processing library: {library.title}") + + if library.type == 'movie': + # Get watched movies + for video in library.search(unwatched=False): + if video.isWatched: + server_counts['movies'] += 1 + key = ('movie', video.title, getattr(video, 'year', '')) + server_items[key] = server_items.get(key, 0) + 1 + + elif library.type == 'show': + # Get watched episodes + for show in library.search(): + for episode in show.episodes(): + if episode.isWatched: + server_counts['episodes'] += 1 + key = ('episode', show.title, episode.seasonNumber, episode.index) + server_items[key] = server_items.get(key, 0) + 1 + else: + logging.info(f"Skipping library type: {library.type}") + + # Compare most recent items + logging.info("\nMost recent items from account history:") + for item in account_history[:5]: + title = getattr(item, 'title', 'N/A') + type_ = getattr(item, 'type', 'N/A') + show = getattr(item, 'grandparentTitle', '') + if show: + title = f"{show} - {title}" + logging.info(f"- {title} ({type_})") + + # Print comparison + logging.info("\nComparison Summary:") + logging.info("Account History Counts:") + logging.info(f"- Movies: {account_counts['movies']}") + logging.info(f"- Episodes: {account_counts['episodes']}") + logging.info(f"- Other: {account_counts['other']}") + logging.info(f"Total: {sum(account_counts.values())}") + + logging.info("\nServer Library Counts:") + logging.info(f"- Movies: {server_counts['movies']}") + logging.info(f"- Episodes: {server_counts['episodes']}") + logging.info(f"Total: {sum(server_counts.values())}") + + # Calculate differences + movie_diff = account_counts['movies'] - server_counts['movies'] + episode_diff = account_counts['episodes'] - server_counts['episodes'] + + logging.info("\nDifferences (Account - Server):") + logging.info(f"- Movies: {movie_diff:+d}") + logging.info(f"- Episodes: {episode_diff:+d}") + + # Find specific differences + logging.info("\nDetailed Differences:") + + # Items in account but not in server + account_only = set(account_items.keys()) - set(server_items.keys()) + if account_only: + logging.info("\nItems in account history but not marked watched on server:") + for item in sorted(account_only)[:5]: # Show first 5 differences + if item[0] == 'movie': + logging.info(f"- Movie: {item[1]} ({item[2]})") + else: + logging.info(f"- Episode: {item[1]} S{item[2]}E{item[3]}") + + # Items in server but not in account + server_only = set(server_items.keys()) - set(account_items.keys()) + if server_only: + logging.info("\nItems marked watched on server but not in account history:") + for item in sorted(server_only)[:5]: # Show first 5 differences + if item[0] == 'movie': + logging.info(f"- Movie: {item[1]} ({item[2]})") + else: + logging.info(f"- Episode: {item[1]} S{item[2]}E{item[3]}") + + except Exception as e: + logging.error(f"Error during history comparison: {str(e)}") + +def sync_test_plex_history_comparison(): + """ + Synchronous wrapper for test_plex_history_comparison + """ + return asyncio.run(test_plex_history_comparison()) def sync_get_watch_history_from_plex(): """ Synchronous wrapper for get_watch_history_from_plex """ return asyncio.run(get_watch_history_from_plex()) + +def sync_test_plex_history_sync(): + """ + Synchronous wrapper for test_plex_history_sync + """ + return asyncio.run(test_plex_history_comparison()) diff --git a/version.txt b/version.txt index cacbd7a5..0e7bb884 100755 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.5.68 +0.5.69 diff --git a/web_scraper.py b/web_scraper.py index 21bb62c4..5bcaf4df 100755 --- a/web_scraper.py +++ b/web_scraper.py @@ -446,8 +446,6 @@ def web_scrape_tvshow(media_id: int, title: str, year: int, season: Optional[int logging.warning(f"No results found for show: {title}") return {"error": "No results found"} - #logging.info(f"Found results: {trakt_data}") - # Fetch TMDB data tmdb_data = get_tmdb_data(media_id, 'tv', season) @@ -469,11 +467,14 @@ def web_scrape_tvshow(media_id: int, title: str, year: int, season: Optional[int "multi": False } for episode in trakt_data - if episode.get('first_aired') is not None - if episode['number'] != 0 + if episode['number'] != 0 # Only filter out special episodes ] } else: + # Get TMDB season data for fallback air dates + tmdb_seasons = tmdb_data.get('seasons', []) + tmdb_seasons_dict = {s['season_number']: s for s in tmdb_seasons} + return { "episode_results": [ { @@ -484,14 +485,13 @@ def web_scrape_tvshow(media_id: int, title: str, year: int, season: Optional[int "year": year, "media_type": 'tv', "poster_path": tmdb_data.get('poster_path'), - "air_date": season.get('first_aired'), - "season_overview": season.get('overview', ''), - "episode_count": season.get('episode_count', 0), + "air_date": season.get('first_aired') or tmdb_seasons_dict.get(season['number'], {}).get('air_date'), + "season_overview": season.get('overview', '') or tmdb_seasons_dict.get(season['number'], {}).get('overview', ''), + "episode_count": season.get('episode_count', 0) or tmdb_seasons_dict.get(season['number'], {}).get('episode_count', 0), "multi": True } for season in trakt_data - if season.get('first_aired') is not None - if season['number'] != 0 + if season['number'] != 0 # Only filter out special episodes ] } except api.exceptions.RequestException as e: