diff --git a/CHANGELOG.md b/CHANGELOG.md index 96f9bac..4b7a82e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- 10 new AI tool recognitions: Grok, Gemini, Jimeng (即梦), Luma, Hailuo (海螺), Pixverse, Genmo, Haiper, Hume, Fish Audio (51 → 61 tools) +- Video frame watermark analysis: extracts keyframes via `ffmpeg` and runs DWT-DCT analysis on video content +- Creation software detection: displays non-AI tools (FFmpeg, Remotion, Premiere, etc.) as informational metadata +- 9 new MP4 SEI watermark markers: Sora, Runway, Pika, Luma, Hailuo, Pixverse, Vidu, Genmo, Haiper +- Metadata stripping hint when no signals are detected (all 7 languages) + +### Fixed + +- Udio pattern matching: `udio.com` → `udio` for broader text matching + ## [0.1.0] - 2025-06-01 ### Added diff --git a/Cargo.toml b/Cargo.toml index b0daf2e..33e2963 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ rustfft = "6" quick-xml = "0.36" rust-i18n = "3" sys-locale = "0.3" +tempfile = "3" [dev-dependencies] assert_cmd = "2" diff --git a/README.md b/README.md index acc9538..16f094a 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ AICheck answers these questions by analyzing file metadata and invisible watermarks. No API keys, no network, no setup. -**10 detection methods** · **51 AI tools** · **16 file formats** · **3 confidence tiers** · **Zero network requests** +**10 detection methods** · **61 AI tools** · **16 file formats** · **3 confidence tiers** · **Zero network requests** --- @@ -79,7 +79,7 @@ real_photo.jpg **XMP/IPTC Metadata (MEDIUM confidence)** — Standard photo metadata: `DigitalSourceType`, `AISystemUsed`, `AIPromptInformation`, `CreatorTool`. Reliable but unsigned — can be faked or stripped. -**MP4 Container Metadata (MEDIUM confidence)** — Parses iTunes-style atoms (`©too`, `©swr`), AIGC labels (China standard with JSON `ProduceID`), and H.264 SEI watermark markers (e.g. Kling). Catches AI signals baked into video containers that other methods miss. +**MP4 Container Metadata (MEDIUM confidence)** — Parses iTunes-style atoms (`©too`, `©swr`), AIGC labels (China standard with JSON `ProduceID`), and H.264 SEI watermark markers (Kling, Sora, Runway, Pika, Luma, Hailuo, Pixverse, Vidu, Genmo, Haiper). Also detects non-AI creation software (FFmpeg, Remotion, Premiere, etc.) for informational display. Catches AI signals baked into video containers that other methods miss. **ID3 Audio Metadata (MEDIUM confidence)** — Reads ID3v2 tags from MP3 files: comment frames (COMM), URL frames (WOAS/WOAF/WXXX), and text frames (TENC/TPUB/TXXX). Detects AI audio platforms like Suno (via embedded URLs and "made with suno" comments). @@ -93,7 +93,7 @@ real_photo.jpg **Audio Spectral Analysis (LOW confidence)** — FFT-based analysis of WAV audio: detects hard frequency cutoffs (energy concentrated below Nyquist) and abnormal spectral flatness typical of TTS/AI synthesis. Runs as a fallback or with `--deep`. -**Invisible Watermarks (LOW confidence)** — Pixel-level DWT-DCT analysis that detects channel noise asymmetry, cross-channel bit agreement, and wavelet energy patterns. Runs automatically as a fallback when no metadata signals are found, or on demand with `--deep`. +**Invisible Watermarks (LOW confidence)** — Pixel-level DWT-DCT analysis that detects channel noise asymmetry, cross-channel bit agreement, and wavelet energy patterns. For videos, automatically extracts keyframes via `ffmpeg` and analyzes them individually. Runs automatically as a fallback when no metadata signals are found, or on demand with `--deep`. --- @@ -103,10 +103,10 @@ real_photo.jpg | Category | Tools | |----------|-------| -| Image generation | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI | -| Video generation | Sora, Google Veo, Runway, Pika, Kling, Vidu | -| Audio/Music generation | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful | -| Multimodal | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image | +| Image generation | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI, Grok, Jimeng (即梦) | +| Video generation | Sora, Google Veo, Runway, Pika, Kling, Vidu, Luma, Hailuo (海螺), Pixverse, Genmo, Haiper | +| Audio/Music generation | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful, Hume, Fish Audio | +| Multimodal | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image, Gemini | | Platforms | Bing Image Creator, Copilot Designer, Microsoft Designer, Canva AI, DreamStudio, NightCafe, Craiyon, DeepAI, Meta AI, Stability AI | | Interfaces | ComfyUI, Automatic1111 (A1111), InvokeAI, Fooocus | | Research | Glide, Parti, Muse, Seedream, Recraft | diff --git a/docs/README.de.md b/docs/README.de.md index ae7fe72..615d546 100644 --- a/docs/README.de.md +++ b/docs/README.de.md @@ -19,7 +19,7 @@ AICheck beantwortet diese Fragen durch Analyse von Datei-Metadaten und unsichtbaren Wasserzeichen. Keine API-Keys, kein Netzwerk, keine Einrichtung. -**10 Erkennungsmethoden** · **51 KI-Tools** · **16 Dateiformate** · **3 Konfidenzstufen** · **Null Netzwerkanfragen** +**10 Erkennungsmethoden** · **61 KI-Tools** · **16 Dateiformate** · **3 Konfidenzstufen** · **Null Netzwerkanfragen** --- @@ -79,7 +79,7 @@ real_photo.jpg **XMP/IPTC-Metadaten (MEDIUM Konfidenz)** — Standard-Foto-Metadaten: `DigitalSourceType`, `AISystemUsed`, `AIPromptInformation`, `CreatorTool`. Zuverlässig, aber nicht signiert — kann gefälscht oder entfernt werden. -**MP4-Container-Metadaten (MEDIUM Konfidenz)** — Analysiert iTunes-Stil-Atome (`©too`, `©swr`), AIGC-Labels (chinesischer Standard mit JSON `ProduceID`) und H.264-SEI-Wasserzeichenmarker (z.B. Kling). Erfasst KI-Signale, die in Videocontainern eingebettet sind und von anderen Methoden übersehen werden. +**MP4-Container-Metadaten (MEDIUM Konfidenz)** — Analysiert iTunes-Stil-Atome (`©too`, `©swr`), AIGC-Labels (chinesischer Standard mit JSON `ProduceID`) und H.264-SEI-Wasserzeichenmarker (Kling, Sora, Runway, Pika, Luma, Hailuo, Pixverse, Vidu, Genmo, Haiper). Erkennt auch nicht-KI-Erstellungssoftware (FFmpeg, Remotion, Premiere usw.) zur informativen Anzeige. Erfasst KI-Signale, die in Videocontainern eingebettet sind und von anderen Methoden übersehen werden. **ID3-Audio-Metadaten (MEDIUM Konfidenz)** — Liest ID3v2-Tags aus MP3-Dateien: Kommentarframes (COMM), URL-Frames (WOAS/WOAF/WXXX) und Textframes (TENC/TPUB/TXXX). Erkennt KI-Audioplattformen wie Suno (über eingebettete URLs und „made with suno"-Kommentare). @@ -93,7 +93,7 @@ real_photo.jpg **Audio-Spektralanalyse (LOW Konfidenz)** — FFT-basierte Analyse von WAV-Audio: erkennt harte Frequenzabschneide (Energie konzentriert unterhalb von Nyquist) und abnormale spektrale Flachheit, typisch für TTS/KI-Synthese. Läuft als Fallback oder mit `--deep`. -**Unsichtbare Wasserzeichen (LOW Konfidenz)** — Pixelbasierte DWT-DCT-Analyse, die Kanalrauschen-Asymmetrie, kanalübergreifende Bit-Übereinstimmung und Wavelet-Energiemuster erkennt. Läuft automatisch als Fallback, wenn keine Metadaten-Signale gefunden werden, oder auf Anforderung mit `--deep`. +**Unsichtbare Wasserzeichen (LOW Konfidenz)** — Pixelbasierte DWT-DCT-Analyse, die Kanalrauschen-Asymmetrie, kanalübergreifende Bit-Übereinstimmung und Wavelet-Energiemuster erkennt. Bei Videos werden automatisch Keyframes über `ffmpeg` extrahiert und einzeln analysiert. Läuft automatisch als Fallback, wenn keine Metadaten-Signale gefunden werden, oder auf Anforderung mit `--deep`. --- @@ -103,10 +103,10 @@ real_photo.jpg | Kategorie | Tools | |-----------|-------| -| Bildgenerierung | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI | -| Videogenerierung | Sora, Google Veo, Runway, Pika, Kling, Vidu | -| Audio-/Musikgenerierung | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful | -| Multimodal | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image | +| Bildgenerierung | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI, Grok, Jimeng (即梦) | +| Videogenerierung | Sora, Google Veo, Runway, Pika, Kling, Vidu, Luma, Hailuo (海螺), Pixverse, Genmo, Haiper | +| Audio-/Musikgenerierung | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful, Hume, Fish Audio | +| Multimodal | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image, Gemini | | Plattformen | Bing Image Creator, Copilot Designer, Microsoft Designer, Canva AI, DreamStudio, NightCafe, Craiyon, DeepAI, Meta AI, Stability AI | | Oberflächen | ComfyUI, Automatic1111 (A1111), InvokeAI, Fooocus | | Forschung | Glide, Parti, Muse, Seedream, Recraft | diff --git a/docs/README.es.md b/docs/README.es.md index fc1fa1c..8e4dbb9 100644 --- a/docs/README.es.md +++ b/docs/README.es.md @@ -19,7 +19,7 @@ AICheck responde estas preguntas analizando los metadatos de archivos y marcas de agua invisibles. Sin API keys, sin red, sin configuración. -**10 métodos de detección** · **51 herramientas de IA** · **16 formatos de archivo** · **3 niveles de confianza** · **Cero peticiones de red** +**10 métodos de detección** · **61 herramientas de IA** · **16 formatos de archivo** · **3 niveles de confianza** · **Cero peticiones de red** --- @@ -79,7 +79,7 @@ real_photo.jpg **Metadatos XMP/IPTC (confianza MEDIUM)** — Metadatos fotográficos estándar: `DigitalSourceType`, `AISystemUsed`, `AIPromptInformation`, `CreatorTool`. Fiables pero sin firma — pueden ser falsificados o eliminados. -**Metadatos de contenedor MP4 (confianza MEDIUM)** — Analiza átomos estilo iTunes (`©too`, `©swr`), etiquetas AIGC (estándar chino con JSON `ProduceID`) y marcadores de marca de agua SEI H.264 (ej. Kling). Captura señales de IA integradas en contenedores de video que otros métodos no detectan. +**Metadatos de contenedor MP4 (confianza MEDIUM)** — Analiza átomos estilo iTunes (`©too`, `©swr`), etiquetas AIGC (estándar chino con JSON `ProduceID`) y marcadores de marca de agua SEI H.264 (Kling, Sora, Runway, Pika, Luma, Hailuo, Pixverse, Vidu, Genmo, Haiper). También detecta software de creación no-IA (FFmpeg, Remotion, Premiere, etc.) para visualización informativa. Captura señales de IA integradas en contenedores de video que otros métodos no detectan. **Metadatos de audio ID3 (confianza MEDIUM)** — Lee etiquetas ID3v2 de archivos MP3: marcos de comentario (COMM), marcos de URL (WOAS/WOAF/WXXX) y marcos de texto (TENC/TPUB/TXXX). Detecta plataformas de audio IA como Suno (mediante URLs incrustadas y comentarios "made with suno"). @@ -93,7 +93,7 @@ real_photo.jpg **Análisis espectral de audio (confianza LOW)** — Análisis basado en FFT de audio WAV: detecta cortes de frecuencia abruptos (energía concentrada por debajo de Nyquist) y planitud espectral anormal típica de síntesis TTS/IA. Se ejecuta como respaldo o con `--deep`. -**Marcas de agua invisibles (confianza LOW)** — Análisis DWT-DCT a nivel de píxel que detecta asimetría de ruido entre canales, concordancia de bits entre canales y patrones de energía wavelet. Se ejecuta automáticamente como respaldo cuando no se encuentran señales de metadatos, o bajo demanda con `--deep`. +**Marcas de agua invisibles (confianza LOW)** — Análisis DWT-DCT a nivel de píxel que detecta asimetría de ruido entre canales, concordancia de bits entre canales y patrones de energía wavelet. Para videos, extrae automáticamente fotogramas clave mediante `ffmpeg` y los analiza individualmente. Se ejecuta automáticamente como respaldo cuando no se encuentran señales de metadatos, o bajo demanda con `--deep`. --- @@ -103,10 +103,10 @@ real_photo.jpg | Categoría | Herramientas | |-----------|-------------| -| Generación de imágenes | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI | -| Generación de video | Sora, Google Veo, Runway, Pika, Kling, Vidu | -| Generación de audio/música | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful | -| Multimodal | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image | +| Generación de imágenes | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI, Grok, Jimeng (即梦) | +| Generación de video | Sora, Google Veo, Runway, Pika, Kling, Vidu, Luma, Hailuo (海螺), Pixverse, Genmo, Haiper | +| Generación de audio/música | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful, Hume, Fish Audio | +| Multimodal | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image, Gemini | | Plataformas | Bing Image Creator, Copilot Designer, Microsoft Designer, Canva AI, DreamStudio, NightCafe, Craiyon, DeepAI, Meta AI, Stability AI | | Interfaces | ComfyUI, Automatic1111 (A1111), InvokeAI, Fooocus | | Investigación | Glide, Parti, Muse, Seedream, Recraft | diff --git a/docs/README.hi.md b/docs/README.hi.md index 4f94ef1..85d49b0 100644 --- a/docs/README.hi.md +++ b/docs/README.hi.md @@ -19,7 +19,7 @@ AICheck फ़ाइल मेटाडेटा और अदृश्य वॉटरमार्क का विश्लेषण करके इन सवालों का जवाब देता है। API कुंजी नहीं, नेटवर्क नहीं, सेटअप नहीं। -**10 पहचान विधियाँ** · **51 AI उपकरण** · **16 फ़ाइल प्रारूप** · **3 विश्वसनीयता स्तर** · **शून्य नेटवर्क अनुरोध** +**10 पहचान विधियाँ** · **61 AI उपकरण** · **16 फ़ाइल प्रारूप** · **3 विश्वसनीयता स्तर** · **शून्य नेटवर्क अनुरोध** --- @@ -79,7 +79,7 @@ real_photo.jpg **XMP/IPTC मेटाडेटा (MEDIUM विश्वसनीयता)** — मानक फ़ोटो मेटाडेटा: `DigitalSourceType`, `AISystemUsed`, `AIPromptInformation`, `CreatorTool`। विश्वसनीय लेकिन बिना हस्ताक्षर — नकली बनाया या हटाया जा सकता है। -**MP4 कंटेनर मेटाडेटा (MEDIUM विश्वसनीयता)** — iTunes-शैली के एटम (`©too`, `©swr`), AIGC लेबल (चीनी मानक, JSON `ProduceID` सहित), और H.264 SEI वॉटरमार्क मार्कर (जैसे Kling) का विश्लेषण करता है। वीडियो कंटेनरों में एम्बेडेड AI सिग्नल पकड़ता है जो अन्य विधियाँ चूक जाती हैं। +**MP4 कंटेनर मेटाडेटा (MEDIUM विश्वसनीयता)** — iTunes-शैली के एटम (`©too`, `©swr`), AIGC लेबल (चीनी मानक, JSON `ProduceID` सहित), और H.264 SEI वॉटरमार्क मार्कर (Kling, Sora, Runway, Pika, Luma, Hailuo, Pixverse, Vidu, Genmo, Haiper) का विश्लेषण करता है। गैर-AI निर्माण सॉफ़्टवेयर (FFmpeg, Remotion, Premiere आदि) को भी सूचनात्मक प्रदर्शन के लिए पहचानता है। वीडियो कंटेनरों में एम्बेडेड AI सिग्नल पकड़ता है जो अन्य विधियाँ चूक जाती हैं। **ID3 ऑडियो मेटाडेटा (MEDIUM विश्वसनीयता)** — MP3 फ़ाइलों से ID3v2 टैग पढ़ता है: कमेंट फ़्रेम (COMM), URL फ़्रेम (WOAS/WOAF/WXXX), और टेक्स्ट फ़्रेम (TENC/TPUB/TXXX)। Suno जैसे AI ऑडियो प्लेटफ़ॉर्म का पता लगाता है (एम्बेडेड URL और "made with suno" कमेंट के माध्यम से)। @@ -93,7 +93,7 @@ real_photo.jpg **ऑडियो स्पेक्ट्रल विश्लेषण (LOW विश्वसनीयता)** — WAV ऑडियो का FFT-आधारित विश्लेषण: हार्ड फ़्रीक्वेंसी कटऑफ़ (Nyquist से नीचे ऊर्जा केंद्रित) और TTS/AI संश्लेषण की विशिष्ट असामान्य स्पेक्ट्रल समतलता का पता लगाता है। फ़ॉलबैक के रूप में या `--deep` के साथ चलता है। -**अदृश्य वॉटरमार्क (LOW विश्वसनीयता)** — पिक्सेल-स्तरीय DWT-DCT विश्लेषण जो चैनल नॉइज़ असमानता, क्रॉस-चैनल बिट सहमति, और वेवलेट ऊर्जा पैटर्न का पता लगाता है। जब कोई मेटाडेटा सिग्नल नहीं मिलता तो स्वचालित रूप से फ़ॉलबैक के रूप में चलता है, या `--deep` के साथ माँग पर चलता है। +**अदृश्य वॉटरमार्क (LOW विश्वसनीयता)** — पिक्सेल-स्तरीय DWT-DCT विश्लेषण जो चैनल नॉइज़ असमानता, क्रॉस-चैनल बिट सहमति, और वेवलेट ऊर्जा पैटर्न का पता लगाता है। वीडियो के लिए, `ffmpeg` के माध्यम से स्वचालित रूप से कीफ़्रेम निकालकर व्यक्तिगत रूप से विश्लेषण करता है। जब कोई मेटाडेटा सिग्नल नहीं मिलता तो स्वचालित रूप से फ़ॉलबैक के रूप में चलता है, या `--deep` के साथ माँग पर चलता है। --- @@ -103,10 +103,10 @@ real_photo.jpg | श्रेणी | उपकरण | |--------|--------| -| इमेज जनरेशन | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI | -| वीडियो जनरेशन | Sora, Google Veo, Runway, Pika, Kling, Vidu | -| ऑडियो/म्यूज़िक जनरेशन | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful | -| मल्टीमोडल | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image | +| इमेज जनरेशन | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI, Grok, Jimeng (即梦) | +| वीडियो जनरेशन | Sora, Google Veo, Runway, Pika, Kling, Vidu, Luma, Hailuo (海螺), Pixverse, Genmo, Haiper | +| ऑडियो/म्यूज़िक जनरेशन | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful, Hume, Fish Audio | +| मल्टीमोडल | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image, Gemini | | प्लेटफ़ॉर्म | Bing Image Creator, Copilot Designer, Microsoft Designer, Canva AI, DreamStudio, NightCafe, Craiyon, DeepAI, Meta AI, Stability AI | | इंटरफ़ेस | ComfyUI, Automatic1111 (A1111), InvokeAI, Fooocus | | रिसर्च | Glide, Parti, Muse, Seedream, Recraft | diff --git a/docs/README.ja.md b/docs/README.ja.md index d82c4aa..a218157 100644 --- a/docs/README.ja.md +++ b/docs/README.ja.md @@ -19,7 +19,7 @@ AICheckはファイルのメタデータと不可視ウォーターマークを分析してこれらの疑問に答えます。APIキー不要、ネットワーク不要、セットアップ不要。 -**10種の検出方法** · **51種のAIツール** · **16種のファイル形式** · **3段階の信頼度** · **ネットワーク通信ゼロ** +**10種の検出方法** · **61種のAIツール** · **16種のファイル形式** · **3段階の信頼度** · **ネットワーク通信ゼロ** --- @@ -79,7 +79,7 @@ real_photo.jpg **XMP/IPTCメタデータ(MEDIUM信頼度)**— 標準的な写真メタデータ:`DigitalSourceType`、`AISystemUsed`、`AIPromptInformation`、`CreatorTool`。信頼性は高いが署名なし——偽造や削除が可能。 -**MP4コンテナメタデータ(MEDIUM信頼度)**— iTunes形式のアトム(`©too`、`©swr`)、AIGCラベル(中国規格、JSON `ProduceID`付き)、H.264 SEIウォーターマークマーカー(例:Kling)を解析。他の方法では見逃されるビデオコンテナに埋め込まれたAIシグナルを検出。 +**MP4コンテナメタデータ(MEDIUM信頼度)**— iTunes形式のアトム(`©too`、`©swr`)、AIGCラベル(中国規格、JSON `ProduceID`付き)、H.264 SEIウォーターマークマーカー(Kling、Sora、Runway、Pika、Luma、Hailuo、Pixverse、Vidu、Genmo、Haiper)を解析。非AI制作ソフトウェア(FFmpeg、Remotion、Premiereなど)も情報表示として検出。他の方法では見逃されるビデオコンテナに埋め込まれたAIシグナルを検出。 **ID3音声メタデータ(MEDIUM信頼度)**— MP3ファイルのID3v2タグを読み取り:コメントフレーム(COMM)、URLフレーム(WOAS/WOAF/WXXX)、テキストフレーム(TENC/TPUB/TXXX)。SunoなどのAI音声プラットフォームを検出(埋め込みURLや「made with suno」コメント経由)。 @@ -93,7 +93,7 @@ real_photo.jpg **音声スペクトル分析(LOW信頼度)**— FFTベースのWAV音声分析:硬い周波数カットオフ(ナイキスト以下にエネルギーが集中)とTTS/AI合成に典型的な異常なスペクトル平坦度を検出。フォールバックとして、または`--deep`で実行。 -**不可視ウォーターマーク(LOW信頼度)**— ピクセルレベルのDWT-DCT分析で、チャンネルノイズの非対称性、チャンネル間ビット一致、ウェーブレットエネルギーパターンを検出。メタデータシグナルが見つからない場合に自動的にフォールバックとして実行、または`--deep`でオンデマンド実行。 +**不可視ウォーターマーク(LOW信頼度)**— ピクセルレベルのDWT-DCT分析で、チャンネルノイズの非対称性、チャンネル間ビット一致、ウェーブレットエネルギーパターンを検出。動画の場合は`ffmpeg`でキーフレームを自動抽出し、個別に分析。メタデータシグナルが見つからない場合に自動的にフォールバックとして実行、または`--deep`でオンデマンド実行。 --- @@ -103,10 +103,10 @@ real_photo.jpg | カテゴリ | ツール | |---------|--------| -| 画像生成 | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI | -| 動画生成 | Sora, Google Veo, Runway, Pika, Kling, Vidu | -| 音声/音楽生成 | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful | -| マルチモーダル | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image | +| 画像生成 | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI, Grok, Jimeng (即梦) | +| 動画生成 | Sora, Google Veo, Runway, Pika, Kling, Vidu, Luma, Hailuo (海螺), Pixverse, Genmo, Haiper | +| 音声/音楽生成 | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful, Hume, Fish Audio | +| マルチモーダル | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image, Gemini | | プラットフォーム | Bing Image Creator, Copilot Designer, Microsoft Designer, Canva AI, DreamStudio, NightCafe, Craiyon, DeepAI, Meta AI, Stability AI | | インターフェース | ComfyUI, Automatic1111 (A1111), InvokeAI, Fooocus | | 研究 | Glide, Parti, Muse, Seedream, Recraft | diff --git a/docs/README.ko.md b/docs/README.ko.md index 86228e0..10cc347 100644 --- a/docs/README.ko.md +++ b/docs/README.ko.md @@ -19,7 +19,7 @@ AICheck는 파일 메타데이터와 보이지 않는 워터마크를 분석하여 이런 질문에 답합니다. API 키 불필요, 네트워크 불필요, 설정 불필요. -**10가지 감지 방법** · **51개 AI 도구** · **16가지 파일 형식** · **3단계 신뢰도** · **네트워크 요청 제로** +**10가지 감지 방법** · **61개 AI 도구** · **16가지 파일 형식** · **3단계 신뢰도** · **네트워크 요청 제로** --- @@ -79,7 +79,7 @@ real_photo.jpg **XMP/IPTC 메타데이터 (MEDIUM 신뢰도)** — 표준 사진 메타데이터: `DigitalSourceType`, `AISystemUsed`, `AIPromptInformation`, `CreatorTool`. 신뢰할 수 있지만 서명되지 않음 — 위조하거나 제거할 수 있습니다. -**MP4 컨테이너 메타데이터 (MEDIUM 신뢰도)** — iTunes 스타일 아톰(`©too`, `©swr`), AIGC 라벨(중국 표준, JSON `ProduceID` 포함), H.264 SEI 워터마크 마커(예: Kling)를 분석합니다. 다른 방법이 놓치는 비디오 컨테이너에 내장된 AI 신호를 감지합니다. +**MP4 컨테이너 메타데이터 (MEDIUM 신뢰도)** — iTunes 스타일 아톰(`©too`, `©swr`), AIGC 라벨(중국 표준, JSON `ProduceID` 포함), H.264 SEI 워터마크 마커(Kling, Sora, Runway, Pika, Luma, Hailuo, Pixverse, Vidu, Genmo, Haiper)를 분석합니다. 비AI 제작 소프트웨어(FFmpeg, Remotion, Premiere 등)도 정보 표시용으로 감지합니다. 다른 방법이 놓치는 비디오 컨테이너에 내장된 AI 신호를 감지합니다. **ID3 오디오 메타데이터 (MEDIUM 신뢰도)** — MP3 파일의 ID3v2 태그를 읽습니다: 코멘트 프레임(COMM), URL 프레임(WOAS/WOAF/WXXX), 텍스트 프레임(TENC/TPUB/TXXX). Suno 같은 AI 오디오 플랫폼을 감지합니다(내장 URL과 "made with suno" 코멘트를 통해). @@ -93,7 +93,7 @@ real_photo.jpg **오디오 스펙트럼 분석 (LOW 신뢰도)** — FFT 기반 WAV 오디오 분석: 하드 주파수 컷오프(나이퀴스트 이하에 에너지 집중)와 TTS/AI 합성에 전형적인 비정상적 스펙트럼 평탄도를 감지합니다. 폴백으로 또는 `--deep`으로 실행됩니다. -**보이지 않는 워터마크 (LOW 신뢰도)** — 픽셀 수준의 DWT-DCT 분석으로 채널 노이즈 비대칭, 채널 간 비트 일치, 웨이블릿 에너지 패턴을 감지합니다. 메타데이터 신호가 발견되지 않으면 자동으로 폴백 실행되거나, `--deep`으로 요청 시 실행됩니다. +**보이지 않는 워터마크 (LOW 신뢰도)** — 픽셀 수준의 DWT-DCT 분석으로 채널 노이즈 비대칭, 채널 간 비트 일치, 웨이블릿 에너지 패턴을 감지합니다. 영상의 경우 `ffmpeg`를 통해 자동으로 키프레임을 추출하여 개별 분석합니다. 메타데이터 신호가 발견되지 않으면 자동으로 폴백 실행되거나, `--deep`으로 요청 시 실행됩니다. --- @@ -103,10 +103,10 @@ real_photo.jpg | 카테고리 | 도구 | |---------|------| -| 이미지 생성 | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI | -| 영상 생성 | Sora, Google Veo, Runway, Pika, Kling, Vidu | -| 오디오/음악 생성 | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful | -| 멀티모달 | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image | +| 이미지 생성 | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI, Grok, Jimeng (即梦) | +| 영상 생성 | Sora, Google Veo, Runway, Pika, Kling, Vidu, Luma, Hailuo (海螺), Pixverse, Genmo, Haiper | +| 오디오/음악 생성 | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful, Hume, Fish Audio | +| 멀티모달 | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image, Gemini | | 플랫폼 | Bing Image Creator, Copilot Designer, Microsoft Designer, Canva AI, DreamStudio, NightCafe, Craiyon, DeepAI, Meta AI, Stability AI | | 인터페이스 | ComfyUI, Automatic1111 (A1111), InvokeAI, Fooocus | | 연구 | Glide, Parti, Muse, Seedream, Recraft | diff --git a/docs/README.zh-CN.md b/docs/README.zh-CN.md index e2ebd9f..8721a75 100644 --- a/docs/README.zh-CN.md +++ b/docs/README.zh-CN.md @@ -19,7 +19,7 @@ AICheck 通过分析文件元数据和隐形水印来回答这些问题。不需要 API key,不需要联网,不需要配置。 -**10 种检测方法** · **51 种 AI 工具** · **16 种文件格式** · **3 级置信度** · **完全离线运行** +**10 种检测方法** · **61 种 AI 工具** · **16 种文件格式** · **3 级置信度** · **完全离线运行** --- @@ -79,7 +79,7 @@ real_photo.jpg **XMP/IPTC 元数据(MEDIUM 置信度)**— 标准照片元数据:`DigitalSourceType`、`AISystemUsed`、`AIPromptInformation`、`CreatorTool`。可靠但没有签名——可以伪造或删除。 -**MP4 容器元数据(MEDIUM 置信度)**— 解析 iTunes 风格原子(`©too`、`©swr`)、AIGC 标签(中国标准,含 JSON `ProduceID`)和 H.264 SEI 水印标记(如 Kling)。能捕获嵌入视频容器中的 AI 信号。 +**MP4 容器元数据(MEDIUM 置信度)**— 解析 iTunes 风格原子(`©too`、`©swr`)、AIGC 标签(中国标准,含 JSON `ProduceID`)和 H.264 SEI 水印标记(Kling、Sora、Runway、Pika、Luma、Hailuo、Pixverse、Vidu、Genmo、Haiper)。同时检测非 AI 创作软件(FFmpeg、Remotion、Premiere 等)作为信息展示。能捕获嵌入视频容器中的 AI 信号。 **ID3 音频元数据(MEDIUM 置信度)**— 读取 MP3 文件的 ID3v2 标签:注释帧(COMM)、URL 帧(WOAS/WOAF/WXXX)和文本帧(TENC/TPUB/TXXX)。可检测 Suno 等 AI 音频平台(通过嵌入的 URL 和「made with suno」注释)。 @@ -93,7 +93,7 @@ real_photo.jpg **音频频谱分析(LOW 置信度)**— 基于 FFT 的 WAV 音频分析:检测硬频率截断(能量集中在奈奎斯特频率以下)和异常的频谱平坦度,这些是 TTS/AI 合成的典型特征。作为后备方案自动运行,或通过 `--deep` 强制启用。 -**隐形水印(LOW 置信度)**— 像素级 DWT-DCT 分析,检测通道噪声不对称性、跨通道比特一致性和小波能量模式。当未检测到元数据信号时自动运行,也可通过 `--deep` 强制启用。 +**隐形水印(LOW 置信度)**— 像素级 DWT-DCT 分析,检测通道噪声不对称性、跨通道比特一致性和小波能量模式。对于视频文件,自动通过 `ffmpeg` 提取关键帧并逐帧分析。当未检测到元数据信号时自动运行,也可通过 `--deep` 强制启用。 --- @@ -103,10 +103,10 @@ real_photo.jpg | 类别 | 工具 | |------|------| -| 图像生成 | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI | -| 视频生成 | Sora, Google Veo, Runway, Pika, Kling, Vidu | -| 音频/音乐生成 | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful | -| 多模态 | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image | +| 图像生成 | DALL-E, Midjourney, Stable Diffusion, Adobe Firefly, Imagen, Flux, Ideogram, Leonardo.ai, NovelAI, Grok, Jimeng (即梦) | +| 视频生成 | Sora, Google Veo, Runway, Pika, Kling, Vidu, Luma, Hailuo (海螺), Pixverse, Genmo, Haiper | +| 音频/音乐生成 | Suno, Udio, ElevenLabs, SoundRaw, AIVA, Boomy, Mubert, Beatoven, Soundful, Hume, Fish Audio | +| 多模态 | GPT-4o, GPT-4, ChatGPT, OpenAI, GPT Image, Gemini | | 平台 | Bing Image Creator, Copilot Designer, Microsoft Designer, Canva AI, DreamStudio, NightCafe, Craiyon, DeepAI, Meta AI, Stability AI | | 界面工具 | ComfyUI, Automatic1111 (A1111), InvokeAI, Fooocus | | 研究项目 | Glide, Parti, Muse, Seedream, Recraft | diff --git a/locales/de.yml b/locales/de.yml index 6b2db48..09a551a 100644 --- a/locales/de.yml +++ b/locales/de.yml @@ -13,6 +13,7 @@ confidence_high: "HOCH" # Ausgabe output_no_signals: "Keine KI-Generierungssignale erkannt." +output_hint_check_original: "Tipp: Soziale Medien und Messaging-Apps entfernen beim Hochladen häufig Metadaten. Analysieren Sie für beste Ergebnisse die Originaldatei." output_summary: "Ergebnisse: %{detected}/%{total} Dateien mit KI-Signalen (%{high} HOCH, %{medium} MITTEL, %{low} NIEDRIG)" output_type_label: "Typ: %{mime}" @@ -64,3 +65,4 @@ signal_audio_cutoff: "Harte Frequenzabschaltung bei %{freq}Hz (%{pct}%% von %{ny signal_audio_flatness: "Spektrale Flachheit %{value} deutet auf synthetisches Audio hin (natürliche Sprache typischerweise > 0,05)" signal_wav_info_tool: "WAV INFO %{key} stimmt mit KI-Tool überein: %{value}" signal_wav_tts_heuristic: "Audioeigenschaften deuten auf TTS hin: Mono %{rate}Hz %{bits}bit" +signal_video_frame_watermark: "Wasserzeichen-Indikatoren in Videobild erkannt (bei %{frame}): %{indicators}" diff --git a/locales/en.yml b/locales/en.yml index 253f33f..918ee10 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -13,6 +13,7 @@ confidence_high: "HIGH" # Output UI output_no_signals: "No AI-generation signals detected." +output_hint_check_original: "Tip: Social media and messaging apps often strip metadata on upload. For best results, analyze the original file." output_summary: "Results: %{detected}/%{total} files with AI signals (%{high} HIGH, %{medium} MEDIUM, %{low} LOW)" output_type_label: "Type: %{mime}" @@ -64,3 +65,4 @@ signal_audio_cutoff: "Hard frequency cutoff at %{freq}Hz (%{pct}%% of %{nyquist} signal_audio_flatness: "Spectral flatness %{value} suggests synthetic audio (natural speech typically > 0.05)" signal_wav_info_tool: "WAV INFO %{key} matches AI tool: %{value}" signal_wav_tts_heuristic: "Audio characteristics suggest TTS: mono %{rate}Hz %{bits}bit" +signal_video_frame_watermark: "Video frame watermark indicators detected (at %{frame}): %{indicators}" diff --git a/locales/es.yml b/locales/es.yml index 46cfaae..9d08fdf 100644 --- a/locales/es.yml +++ b/locales/es.yml @@ -13,6 +13,7 @@ confidence_high: "ALTO" # Interfaz de salida output_no_signals: "No se detectaron señales de generación por IA." +output_hint_check_original: "Consejo: Las redes sociales y apps de mensajería suelen eliminar los metadatos al subir archivos. Para mejores resultados, analice el archivo original." output_summary: "Resultados: %{detected}/%{total} archivos con señales de IA (%{high} ALTO, %{medium} MEDIO, %{low} BAJO)" output_type_label: "Tipo: %{mime}" @@ -64,3 +65,4 @@ signal_audio_cutoff: "Corte de frecuencia abrupto en %{freq}Hz (%{pct}%% de %{ny signal_audio_flatness: "Planitud espectral %{value} sugiere audio sintético (el habla natural típicamente > 0,05)" signal_wav_info_tool: "WAV INFO %{key} coincide con herramienta de IA: %{value}" signal_wav_tts_heuristic: "Características de audio sugieren TTS: mono %{rate}Hz %{bits}bit" +signal_video_frame_watermark: "Indicadores de marca de agua detectados en fotograma de video (en %{frame}): %{indicators}" diff --git a/locales/hi.yml b/locales/hi.yml index b044d07..625475e 100644 --- a/locales/hi.yml +++ b/locales/hi.yml @@ -13,6 +13,7 @@ confidence_high: "उच्च" # आउटपुट UI output_no_signals: "कोई AI-जनन संकेत नहीं पाया गया।" +output_hint_check_original: "सुझाव: सोशल मीडिया और मैसेजिंग ऐप्स अपलोड करते समय अक्सर मेटाडेटा हटा देते हैं। सर्वोत्तम परिणामों के लिए मूल फ़ाइल का विश्लेषण करें।" output_summary: "परिणाम: %{detected}/%{total} फ़ाइलों में AI संकेत (%{high} उच्च, %{medium} मध्यम, %{low} कम)" output_type_label: "प्रकार: %{mime}" @@ -64,3 +65,4 @@ signal_audio_cutoff: "%{freq}Hz पर आवृत्ति कटऑफ़ प signal_audio_flatness: "स्पेक्ट्रल समतलता %{value} सिंथेटिक ऑडियो का संकेत देती है (प्राकृतिक वाणी आमतौर पर > 0.05)" signal_wav_info_tool: "WAV INFO %{key} AI टूल से मेल खाता है: %{value}" signal_wav_tts_heuristic: "ऑडियो विशेषताएँ TTS का संकेत देती हैं: मोनो %{rate}Hz %{bits}bit" +signal_video_frame_watermark: "वीडियो फ़्रेम वॉटरमार्क संकेतक पाए गए (%{frame} पर): %{indicators}" diff --git a/locales/ja.yml b/locales/ja.yml index 1056e4a..4694de5 100644 --- a/locales/ja.yml +++ b/locales/ja.yml @@ -13,6 +13,7 @@ confidence_high: "高" # 出力UI output_no_signals: "AI生成シグナルは検出されませんでした。" +output_hint_check_original: "ヒント:SNSやメッセージアプリはアップロード時にメタデータを削除することがあります。最良の結果を得るには、オリジナルファイルを分析してください。" output_summary: "結果:%{detected}/%{total} ファイルにAIシグナルあり(%{high} 高、%{medium} 中、%{low} 低)" output_type_label: "タイプ:%{mime}" @@ -64,3 +65,4 @@ signal_audio_cutoff: "%{freq}Hzで周波数の急激な遮断を検出(ナイ signal_audio_flatness: "スペクトル平坦度 %{value} は合成音声を示唆(自然音声は通常 > 0.05)" signal_wav_info_tool: "WAV INFO %{key} がAIツールに一致:%{value}" signal_wav_tts_heuristic: "音声特性がTTSを示唆:モノラル %{rate}Hz %{bits}bit" +signal_video_frame_watermark: "動画フレームの電子透かし指標を検出(%{frame}時点):%{indicators}" diff --git a/locales/ko.yml b/locales/ko.yml index 14adcb9..bda6a47 100644 --- a/locales/ko.yml +++ b/locales/ko.yml @@ -13,6 +13,7 @@ confidence_high: "높음" # 출력 UI output_no_signals: "AI 생성 신호가 감지되지 않았습니다." +output_hint_check_original: "팁: 소셜 미디어와 메신저 앱은 업로드 시 메타데이터를 제거하는 경우가 많습니다. 최상의 결과를 위해 원본 파일을 분석하세요." output_summary: "결과: %{detected}/%{total} 파일에서 AI 신호 감지 (%{high} 높음, %{medium} 중간, %{low} 낮음)" output_type_label: "유형: %{mime}" @@ -64,3 +65,4 @@ signal_audio_cutoff: "%{freq}Hz에서 주파수 급격한 차단 감지 (나이 signal_audio_flatness: "스펙트럼 평탄도 %{value}는 합성 오디오를 시사 (자연 음성은 일반적으로 > 0.05)" signal_wav_info_tool: "WAV INFO %{key}이(가) AI 도구와 일치: %{value}" signal_wav_tts_heuristic: "오디오 특성이 TTS를 시사: 모노 %{rate}Hz %{bits}bit" +signal_video_frame_watermark: "비디오 프레임 워터마크 지표 감지 (%{frame} 위치): %{indicators}" diff --git a/locales/zh-CN.yml b/locales/zh-CN.yml index 8b476da..02d8a8c 100644 --- a/locales/zh-CN.yml +++ b/locales/zh-CN.yml @@ -13,6 +13,7 @@ confidence_high: "高" # 输出界面 output_no_signals: "未检测到 AI 生成信号。" +output_hint_check_original: "提示:社交媒体和通讯应用上传时通常会清除元数据。建议检查原始文件以获得最佳检测效果。" output_summary: "结果:%{detected}/%{total} 个文件检测到 AI 信号(%{high} 高、%{medium} 中、%{low} 低)" output_type_label: "类型:%{mime}" @@ -64,3 +65,4 @@ signal_audio_cutoff: "在 %{freq}Hz 处检测到频率硬截断(奈奎斯特 signal_audio_flatness: "频谱平坦度 %{value} 表明为合成音频(自然语音通常 > 0.05)" signal_wav_info_tool: "WAV INFO %{key} 匹配 AI 工具:%{value}" signal_wav_tts_heuristic: "音频特征表明为 TTS:单声道 %{rate}Hz %{bits}bit" +signal_video_frame_watermark: "视频帧水印指标检测到(位于 %{frame}):%{indicators}" diff --git a/src/detector/mod.rs b/src/detector/mod.rs index a52bf4f..71e2f2b 100644 --- a/src/detector/mod.rs +++ b/src/detector/mod.rs @@ -182,6 +182,9 @@ pub struct FileReport { pub ai_generated: bool, #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, + /// Detected creation software (informational, not AI-related). + #[serde(skip_serializing_if = "Vec::is_empty")] + pub software_info: Vec<(String, String)>, } impl FileReport { @@ -199,6 +202,7 @@ impl FileReport { overall_confidence, ai_generated, error: None, + software_info: Vec::new(), } } @@ -211,6 +215,7 @@ impl FileReport { overall_confidence: Confidence::None, ai_generated: false, error: Some(error), + software_info: Vec::new(), } } } @@ -320,16 +325,41 @@ pub fn run_all_detectors(path: &Path, deep: bool) -> FileReport { } // Watermark detector — pixel-level analysis + let is_video = mime_type + .as_deref() + .map(|m| m.starts_with("video/")) + .unwrap_or(false); + if deep || signals.is_empty() { - match watermark::detect(path) { - Ok(sigs) => signals.extend(sigs), - Err(e) => { - if std::env::var("AIC_DEBUG").is_ok() { - eprintln!(" [debug] Watermark: {}", e); + if is_video { + // Video: extract frames and analyze + match watermark::detect_video(path) { + Ok(sigs) => signals.extend(sigs), + Err(e) => { + if std::env::var("AIC_DEBUG").is_ok() { + eprintln!(" [debug] Watermark (video): {}", e); + } + } + } + } else { + // Image: direct pixel analysis + match watermark::detect(path) { + Ok(sigs) => signals.extend(sigs), + Err(e) => { + if std::env::var("AIC_DEBUG").is_ok() { + eprintln!(" [debug] Watermark: {}", e); + } } } } } - FileReport::from_signals(path.to_path_buf(), mime_type, signals) + let mut report = FileReport::from_signals(path.to_path_buf(), mime_type, signals); + + // Collect informational software metadata + if let Ok(sw) = mp4_metadata::detect_software(path) { + report.software_info.extend(sw); + } + + report } diff --git a/src/detector/mp4_metadata.rs b/src/detector/mp4_metadata.rs index 6f944df..12ec98e 100644 --- a/src/detector/mp4_metadata.rs +++ b/src/detector/mp4_metadata.rs @@ -8,7 +8,18 @@ use crate::known_tools; const MP4_TOOL_MAPPINGS: &[(&str, &str, Confidence)] = &[("google", "google veo", Confidence::Medium)]; -const SEI_MARKERS: &[(&[u8], &str)] = &[(b"kling-ai", "kling")]; +const SEI_MARKERS: &[(&[u8], &str)] = &[ + (b"kling-ai", "kling"), + (b"sora", "sora"), + (b"runway", "runway"), + (b"pika-labs", "pika"), + (b"luma-ai", "luma"), + (b"hailuo", "hailuo"), + (b"pixverse", "pixverse"), + (b"vidu-ai", "vidu"), + (b"genmo", "genmo"), + (b"haiper", "haiper"), +]; fn read_u32_be(data: &[u8], offset: usize) -> Option { if offset + 4 > data.len() { @@ -324,6 +335,62 @@ pub fn detect(path: &Path) -> Result> { Ok(signals) } +/// Known non-AI creation software patterns for informational reporting. +const SOFTWARE_PATTERNS: &[(&str, &str)] = &[ + ("remotion", "Remotion"), + ("lavf", "FFmpeg"), + ("lavc", "FFmpeg"), + ("ffmpeg", "FFmpeg"), + ("premiere", "Adobe Premiere Pro"), + ("after effects", "Adobe After Effects"), + ("davinci resolve", "DaVinci Resolve"), + ("final cut", "Final Cut Pro"), + ("imovie", "iMovie"), + ("handbrake", "HandBrake"), + ("obs", "OBS Studio"), + ("kdenlive", "Kdenlive"), + ("shotcut", "Shotcut"), + ("blender", "Blender"), + ("capcut", "CapCut"), + ("剪映", "CapCut"), +]; + +/// Detect creation software from MP4 metadata (informational, not AI-related). +pub fn detect_software(path: &Path) -> Result> { + let data = fs::read(path)?; + if get_box(&data, 0, data.len().min(64), b"ftyp").is_none() { + return Ok(vec![]); + } + let entries = extract_ilst_entries(&data); + let mut result = Vec::new(); + + let info_keys: &[&str] = &["\u{a9}too", "\u{a9}swr", "\u{a9}cmt"]; + for (key, value) in &entries { + let is_info_key = info_keys.iter().any(|k| key.eq_ignore_ascii_case(k)); + if !is_info_key || value.is_empty() { + continue; + } + let lower = value.to_lowercase(); + // Skip if it matches an AI tool (those are handled by detect()) + if crate::known_tools::match_ai_tool(value).is_some() { + continue; + } + for &(pattern, label) in SOFTWARE_PATTERNS { + if lower.contains(pattern) { + let label_str = match key.as_str() { + "\u{a9}too" => "Encoder", + "\u{a9}swr" => "Software", + "\u{a9}cmt" => "Comment", + _ => key.as_str(), + }; + result.push((label_str.to_string(), format!("{} ({})", value, label))); + break; + } + } + } + Ok(result) +} + pub fn dump_info(path: &Path) -> Result> { let data = fs::read(path)?; if get_box(&data, 0, data.len().min(64), b"ftyp").is_none() { diff --git a/src/detector/watermark.rs b/src/detector/watermark.rs index 87bd184..4248f82 100644 --- a/src/detector/watermark.rs +++ b/src/detector/watermark.rs @@ -1,5 +1,6 @@ use anyhow::{Context, Result}; use std::path::Path; +use std::process::Command; use super::{Confidence, Signal, SignalBuilder, SignalSource}; use crate::i18n; @@ -355,6 +356,131 @@ fn haar_dwt_2d(data: &[f64], width: usize, height: usize) -> DwtSubbands { DwtSubbands { ll, lh, hl, hh } } +/// Number of frames to extract from a video for watermark analysis. +const VIDEO_FRAME_COUNT: usize = 3; + +/// Analyze video frames for invisible watermarks by extracting keyframes via ffmpeg. +/// Returns empty if ffmpeg is not available or extraction fails. +pub fn detect_video(path: &Path) -> Result> { + let debug = std::env::var("AIC_DEBUG").is_ok(); + + // Check if ffmpeg is available + if Command::new("ffmpeg").arg("-version").output().is_err() { + if debug { + eprintln!(" [debug] Watermark video: ffmpeg not found, skipping frame analysis"); + } + return Ok(vec![]); + } + + // Get video duration via ffprobe + let duration = get_video_duration(path); + if duration.is_none() { + if debug { + eprintln!(" [debug] Watermark video: could not determine duration"); + } + return Ok(vec![]); + } + let duration = duration.unwrap(); + if duration < 0.5 { + return Ok(vec![]); + } + + let tmp_dir = tempfile::tempdir().context("Failed to create temp dir for video frames")?; + let mut all_signals = Vec::new(); + + // Extract frames at evenly spaced positions (25%, 50%, 75%) + for i in 1..=VIDEO_FRAME_COUNT { + let timestamp = duration * i as f64 / (VIDEO_FRAME_COUNT as f64 + 1.0); + let frame_path = tmp_dir.path().join(format!("frame_{}.png", i)); + + let status = Command::new("ffmpeg") + .args([ + "-ss", + &format!("{:.2}", timestamp), + "-i", + &path.to_string_lossy(), + "-frames:v", + "1", + "-q:v", + "1", + &frame_path.to_string_lossy(), + "-y", + ]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status(); + + if let Ok(s) = status { + if s.success() && frame_path.exists() { + if debug { + eprintln!( + " [debug] Watermark video: analyzing frame {} at {:.1}s", + i, timestamp + ); + } + match detect(&frame_path) { + Ok(signals) if !signals.is_empty() => { + // Re-wrap signals with video frame context + for signal in signals { + let indicators = signal + .details + .iter() + .map(|(k, v)| format!("{}={}", k, v)) + .collect::>() + .join(", "); + all_signals.push( + SignalBuilder::new( + SignalSource::Watermark, + Confidence::Low, + "signal_video_frame_watermark", + ) + .param("frame", format!("{:.1}s", timestamp)) + .param( + "indicators", + if indicators.is_empty() { + &signal.description + } else { + &indicators + }, + ) + .details(signal.details) + .build(), + ); + } + // One positive frame is enough evidence + break; + } + Ok(_) => {} + Err(e) => { + if debug { + eprintln!(" [debug] Watermark video frame {}: {}", i, e); + } + } + } + } + } + } + + Ok(all_signals) +} + +fn get_video_duration(path: &Path) -> Option { + let output = Command::new("ffprobe") + .args([ + "-v", + "quiet", + "-print_format", + "default=noprint_wrappers=1:nokey=1", + "-show_entries", + "format=duration", + &path.to_string_lossy(), + ]) + .output() + .ok()?; + let stdout = String::from_utf8_lossy(&output.stdout); + stdout.trim().parse::().ok() +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/known_tools.rs b/src/known_tools.rs index 862fc60..29573d8 100644 --- a/src/known_tools.rs +++ b/src/known_tools.rs @@ -51,7 +51,7 @@ pub const AI_TOOL_PATTERNS: &[&str] = &[ "elevenlabs", "eleven labs", "suno", - "udio.com", + "udio", "soundraw", "aiva", "boomy", @@ -59,6 +59,21 @@ pub const AI_TOOL_PATTERNS: &[&str] = &[ "loudly.com", "beatoven", "soundful", + // New image generation tools + "grok", + "gemini", + "jimeng", + "即梦", + // New video generation tools + "luma", + "hailuo", + "海螺", + "pixverse", + "genmo", + "haiper", + // New audio generation tools + "hume", + "fish audio", ]; /// Check if a string contains any known AI tool pattern (case-insensitive). diff --git a/src/output.rs b/src/output.rs index 8d26794..fc52759 100644 --- a/src/output.rs +++ b/src/output.rs @@ -50,8 +50,14 @@ pub fn print_human(reports: &[FileReport]) { continue; } + // Show detected creation software (informational) + for (label, value) in &report.software_info { + println!(" {} {}: {}", "INFO ".cyan(), label.dimmed(), value); + } + if report.signals.is_empty() { println!(" {}", i18n::t("output_no_signals", &[]).dimmed()); + println!(" {}", i18n::t("output_hint_check_original", &[]).dimmed()); continue; }