Merge pull request #8 from Pseudonium/develop

Markdown and Image Update
ObsidianToAnki · Sep 2, 2020 · 590bae1 · 590bae1
2 parents 6e10afb + 99fc160
commit 590bae1
Show file tree

Hide file tree

Showing 2 changed files with 131 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,9 @@ Script to add flashcards from an Obsidian markdown file to Anki.
 3. Place the script "obsidian_to_anki.py" in a convenient folder. You may wish to consider placing it in a Scripts folder, and adding the folder to your PATH
 4. Start up Anki, and navigate to your desired profile
 5. Ensure that you've installed [AnkiConnect](https://github.com/FooSoft/anki-connect).
-6. From the command line, run the script once with no arguments - `{Path to script}/obsidian_to_anki.py`
+6. Install the `python-markdown` library - see installation instructions [here](https://github.com/Python-Markdown/markdown). `pip install markdown` should work.
+7. Check the Permissions tab below to ensure the script is able to run.
+8. From the command line, run the script once with no arguments - `{Path to script}/obsidian_to_anki.py`
 This will make a configuration file in the same directory as the script, "obsidian_to_anki_config.ini".
 
 ## Permissions
@@ -50,6 +52,16 @@ In the markdown file, you must format your notes as follows:
 > Tags:   
 > END  
 
+### Markdown formatting
+
+Standard markdown formatting is supported. GitHub-flavoured code blocks are supported (but Anki won't show syntax highlighting)
+
+### Image formatting
+
+Embedded images are supported if the following criteria are met:
+1. The image is stored locally
+2. It is embedded using the standard markdown syntax: `![alt-text](path_to_image)`
+
 ### Tag formatting
 
 Note that the Tags: line is optional - if you don't want tags, you may leave out the line.
@@ -126,16 +138,19 @@ Then you now format your notes like this:
 > {Note Data}  
 > END  
 
-## Supported?
+## Features
 
-Currently supported features:
+Current features:
 * Custom note types
 * Updating notes from Obsidian
 * Substitutions (see above)
 * Auto-convert math formatting
 * Tags
 * Adding to decks other than Default
-
-Not currently supported features:
-* Media
 * Markdown formatting
+* Embedded images (GIFs should work too)
+
+Not available:
+* Audio
+* Deleting notes from Obsidian
+* Reading from all files in a directory automatically - script works on a per-file basis
diff --git a/obsidian_to_anki.py b/obsidian_to_anki.py
@@ -8,6 +8,12 @@
 import argparse
 import collections
 import webbrowser
+import markdown
+import base64
+
+md_parser = markdown.Markdown(
+    extensions=['extra'], output_format="html5"
+)
 
 
 def write_safe(filename, contents):
@@ -48,6 +54,12 @@ def string_insert(string, position_inserts):
     return string
 
 
+def file_encode(filepath):
+    """Encode the file as base 64."""
+    with open(filepath, 'rb') as f:
+        return base64.b64encode(f.read()).decode('utf-8')
+
+
 class AnkiConnect:
     """Namespace for AnkiConnect functions."""
 
@@ -76,15 +88,25 @@ def invoke(action, **params):
 class FormatConverter:
     """Converting Obsidian formatting to Anki formatting."""
 
-    INLINE_MATH_REGEXP = re.compile(r"(?<!\$)\$(?=[\S])(?=[^$])[\s\S]*?\S\$")
-    DISPLAY_MATH_REGEXP = re.compile(r"\$\$[\s\S]*?\$\$")
+    OBS_INLINE_MATH_REGEXP = re.compile(
+        r"(?<!\$)\$(?=[\S])(?=[^$])[\s\S]*?\S\$"
+    )
+    OBS_DISPLAY_MATH_REGEXP = re.compile(r"\$\$[\s\S]*?\$\$")
 
     ANKI_INLINE_START = r"\("
     ANKI_INLINE_END = r"\)"
 
     ANKI_DISPLAY_START = r"\["
     ANKI_DISPLAY_END = r"\]"
 
+    ANKI_MATH_REGEXP = re.compile(r"(\\\[[\s\S]*?\\\])|(\\\([\s\S]*?\\\))")
+
+    MATH_REPLACE = "OBSTOANKIMATH"
+
+    IMAGE_PATHS = set()
+    IMAGE_REGEXP = re.compile(r'<img alt="[\s\S]*?" src="([\s\S]*?)">')
+
+    @staticmethod
     def inline_anki_repl(matchobject):
         """Get replacement string for Obsidian-formatted inline math."""
         found_string = matchobject.group(0)
@@ -95,6 +117,7 @@ def inline_anki_repl(matchobject):
         result += FormatConverter.ANKI_INLINE_END
         return result
 
+    @staticmethod
     def display_anki_repl(matchobject):
         """Get replacement string for Obsidian-formatted display math."""
         found_string = matchobject.group(0)
@@ -105,20 +128,75 @@ def display_anki_repl(matchobject):
         result += FormatConverter.ANKI_DISPLAY_END
         return result
 
+    @staticmethod
     def obsidian_to_anki_math(note_text):
         """Convert Obsidian-formatted math to Anki-formatted math."""
-        return FormatConverter.INLINE_MATH_REGEXP.sub(
+        return FormatConverter.OBS_INLINE_MATH_REGEXP.sub(
             FormatConverter.inline_anki_repl,
-            FormatConverter.DISPLAY_MATH_REGEXP.sub(
+            FormatConverter.OBS_DISPLAY_MATH_REGEXP.sub(
                 FormatConverter.display_anki_repl, note_text
             )
         )
 
+    @staticmethod
+    def markdown_parse(text):
+        """Apply markdown conversions to text."""
+        text = md_parser.reset().convert(text)
+        return text
+
+    @staticmethod
     def format(note_text):
         """Apply all format conversions to note_text."""
         note_text = FormatConverter.obsidian_to_anki_math(note_text)
+        # Extract the parts that are anki math
+        math_matches = [
+            math_match.group(0)
+            for math_match in FormatConverter.ANKI_MATH_REGEXP.finditer(
+                note_text
+            )
+        ]
+        # Replace them to be later added  back, so they don't interfere
+        # With markdown parsing
+        note_text = FormatConverter.ANKI_MATH_REGEXP.sub(
+            FormatConverter.MATH_REPLACE, note_text
+        )
+        note_text = FormatConverter.markdown_parse(note_text)
+        # Add back the parts that are anki math
+        for math_match in math_matches:
+            note_text = note_text.replace(
+                FormatConverter.MATH_REPLACE,
+                math_match,
+                1
+            )
+        print(note_text)
+        FormatConverter.get_images(note_text)
+        note_text = FormatConverter.fix_image_src(note_text)
         return note_text
 
+    @staticmethod
+    def get_images(html_text):
+        """Get all the images that need to be added."""
+        for match in FormatConverter.IMAGE_REGEXP.finditer(html_text):
+            FormatConverter.IMAGE_PATHS.add(match.group(1))
+            # ^Adds the image path (relative to cwd)
+
+    @staticmethod
+    def fix_image_src_repl(matchobject):
+        """Replace the src in matchobject appropriately."""
+        found_string, found_path = matchobject.group(0), matchobject.group(1)
+        found_string = found_string.replace(
+            found_path, os.path.basename(found_path)
+        )
+        return found_string
+
+    @staticmethod
+    def fix_image_src(html_text):
+        """Fix the src of the images so that it's relative to Anki."""
+        return FormatConverter.IMAGE_REGEXP.sub(
+            FormatConverter.fix_image_src_repl,
+            html_text
+        )
+
 
 class Note:
     """Manages parsing notes into a dictionary formatted for AnkiConnect.
@@ -135,7 +213,7 @@ class Note:
 
     def __init__(self, note_text):
         """Set up useful variables."""
-        self.text = FormatConverter.format(note_text)
+        self.text = note_text
         self.lines = self.text.splitlines()
         self.note_type = Note.note_subs[self.lines[0]]
         self.subs = Note.field_subs[self.note_type]
@@ -201,10 +279,16 @@ def fields(self):
         fields = dict.fromkeys(self.field_names, "")
         for line in self.lines[1:]:
             if self.next_sub and line.startswith(self.next_sub):
+                # This means we're entering a new field.
+                # So, we should format the text in the current field
                 self.current_field_num += 1
                 line = line[len(self.current_sub):]
-            fields[self.current_field] += line + " "
-        return {key: value.rstrip() for key, value in fields.items()}
+            fields[self.current_field] += line + "\n"
+        fields = {
+            key: FormatConverter.format(value)
+            for key, value in fields.items()
+        }
+        return {key: value.strip() for key, value in fields.items()}
 
     def parse(self):
         """Get a properly formatted dictionary of the note."""
@@ -312,6 +396,7 @@ def __init__(self):
                 Note.TARGET_DECK = self.target_deck
             print("Identified target deck as", Note.TARGET_DECK)
             self.scan_file()
+            self.add_images()
             self.add_notes()
             self.write_ids()
             self.update_fields()
@@ -368,6 +453,24 @@ def scan_file(self):
             else:
                 self.notes_to_edit.append(parsed)
 
+    def add_images(self):
+        """Add images from FormatConverter to Anki's media folder."""
+        print("Adding images with these paths...")
+        print(FormatConverter.IMAGE_PATHS)
+        AnkiConnect.invoke(
+            "multi",
+            actions=[
+                AnkiConnect.request(
+                    "storeMediaFile",
+                    filename=imgpath.replace(
+                        imgpath, os.path.basename(imgpath)
+                    ),
+                    data=file_encode(imgpath)
+                )
+                for imgpath in FormatConverter.IMAGE_PATHS
+            ]
+        )
+
     @staticmethod
     def id_to_str(id):
         """Get the string repr of id."""