Skip to content

Commit

Permalink
Merge pull request #8 from Pseudonium/develop
Browse files Browse the repository at this point in the history
Markdown and Image Update
  • Loading branch information
Pseudonium authored Sep 2, 2020
2 parents 6e10afb + 99fc160 commit 590bae1
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 13 deletions.
27 changes: 21 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ Script to add flashcards from an Obsidian markdown file to Anki.
3. Place the script "obsidian_to_anki.py" in a convenient folder. You may wish to consider placing it in a Scripts folder, and adding the folder to your PATH
4. Start up Anki, and navigate to your desired profile
5. Ensure that you've installed [AnkiConnect](https://github.com/FooSoft/anki-connect).
6. From the command line, run the script once with no arguments - `{Path to script}/obsidian_to_anki.py`
6. Install the `python-markdown` library - see installation instructions [here](https://github.com/Python-Markdown/markdown). `pip install markdown` should work.
7. Check the Permissions tab below to ensure the script is able to run.
8. From the command line, run the script once with no arguments - `{Path to script}/obsidian_to_anki.py`
This will make a configuration file in the same directory as the script, "obsidian_to_anki_config.ini".

## Permissions
Expand Down Expand Up @@ -50,6 +52,16 @@ In the markdown file, you must format your notes as follows:
> Tags:
> END
### Markdown formatting

Standard markdown formatting is supported. GitHub-flavoured code blocks are supported (but Anki won't show syntax highlighting)

### Image formatting

Embedded images are supported if the following criteria are met:
1. The image is stored locally
2. It is embedded using the standard markdown syntax: `![alt-text](path_to_image)`

### Tag formatting

Note that the Tags: line is optional - if you don't want tags, you may leave out the line.
Expand Down Expand Up @@ -126,16 +138,19 @@ Then you now format your notes like this:
> {Note Data}
> END
## Supported?
## Features

Currently supported features:
Current features:
* Custom note types
* Updating notes from Obsidian
* Substitutions (see above)
* Auto-convert math formatting
* Tags
* Adding to decks other than Default

Not currently supported features:
* Media
* Markdown formatting
* Embedded images (GIFs should work too)

Not available:
* Audio
* Deleting notes from Obsidian
* Reading from all files in a directory automatically - script works on a per-file basis
117 changes: 110 additions & 7 deletions obsidian_to_anki.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
import argparse
import collections
import webbrowser
import markdown
import base64

md_parser = markdown.Markdown(
extensions=['extra'], output_format="html5"
)


def write_safe(filename, contents):
Expand Down Expand Up @@ -48,6 +54,12 @@ def string_insert(string, position_inserts):
return string


def file_encode(filepath):
"""Encode the file as base 64."""
with open(filepath, 'rb') as f:
return base64.b64encode(f.read()).decode('utf-8')


class AnkiConnect:
"""Namespace for AnkiConnect functions."""

Expand Down Expand Up @@ -76,15 +88,25 @@ def invoke(action, **params):
class FormatConverter:
"""Converting Obsidian formatting to Anki formatting."""

INLINE_MATH_REGEXP = re.compile(r"(?<!\$)\$(?=[\S])(?=[^$])[\s\S]*?\S\$")
DISPLAY_MATH_REGEXP = re.compile(r"\$\$[\s\S]*?\$\$")
OBS_INLINE_MATH_REGEXP = re.compile(
r"(?<!\$)\$(?=[\S])(?=[^$])[\s\S]*?\S\$"
)
OBS_DISPLAY_MATH_REGEXP = re.compile(r"\$\$[\s\S]*?\$\$")

ANKI_INLINE_START = r"\("
ANKI_INLINE_END = r"\)"

ANKI_DISPLAY_START = r"\["
ANKI_DISPLAY_END = r"\]"

ANKI_MATH_REGEXP = re.compile(r"(\\\[[\s\S]*?\\\])|(\\\([\s\S]*?\\\))")

MATH_REPLACE = "OBSTOANKIMATH"

IMAGE_PATHS = set()
IMAGE_REGEXP = re.compile(r'<img alt="[\s\S]*?" src="([\s\S]*?)">')

@staticmethod
def inline_anki_repl(matchobject):
"""Get replacement string for Obsidian-formatted inline math."""
found_string = matchobject.group(0)
Expand All @@ -95,6 +117,7 @@ def inline_anki_repl(matchobject):
result += FormatConverter.ANKI_INLINE_END
return result

@staticmethod
def display_anki_repl(matchobject):
"""Get replacement string for Obsidian-formatted display math."""
found_string = matchobject.group(0)
Expand All @@ -105,20 +128,75 @@ def display_anki_repl(matchobject):
result += FormatConverter.ANKI_DISPLAY_END
return result

@staticmethod
def obsidian_to_anki_math(note_text):
"""Convert Obsidian-formatted math to Anki-formatted math."""
return FormatConverter.INLINE_MATH_REGEXP.sub(
return FormatConverter.OBS_INLINE_MATH_REGEXP.sub(
FormatConverter.inline_anki_repl,
FormatConverter.DISPLAY_MATH_REGEXP.sub(
FormatConverter.OBS_DISPLAY_MATH_REGEXP.sub(
FormatConverter.display_anki_repl, note_text
)
)

@staticmethod
def markdown_parse(text):
"""Apply markdown conversions to text."""
text = md_parser.reset().convert(text)
return text

@staticmethod
def format(note_text):
"""Apply all format conversions to note_text."""
note_text = FormatConverter.obsidian_to_anki_math(note_text)
# Extract the parts that are anki math
math_matches = [
math_match.group(0)
for math_match in FormatConverter.ANKI_MATH_REGEXP.finditer(
note_text
)
]
# Replace them to be later added back, so they don't interfere
# With markdown parsing
note_text = FormatConverter.ANKI_MATH_REGEXP.sub(
FormatConverter.MATH_REPLACE, note_text
)
note_text = FormatConverter.markdown_parse(note_text)
# Add back the parts that are anki math
for math_match in math_matches:
note_text = note_text.replace(
FormatConverter.MATH_REPLACE,
math_match,
1
)
print(note_text)
FormatConverter.get_images(note_text)
note_text = FormatConverter.fix_image_src(note_text)
return note_text

@staticmethod
def get_images(html_text):
"""Get all the images that need to be added."""
for match in FormatConverter.IMAGE_REGEXP.finditer(html_text):
FormatConverter.IMAGE_PATHS.add(match.group(1))
# ^Adds the image path (relative to cwd)

@staticmethod
def fix_image_src_repl(matchobject):
"""Replace the src in matchobject appropriately."""
found_string, found_path = matchobject.group(0), matchobject.group(1)
found_string = found_string.replace(
found_path, os.path.basename(found_path)
)
return found_string

@staticmethod
def fix_image_src(html_text):
"""Fix the src of the images so that it's relative to Anki."""
return FormatConverter.IMAGE_REGEXP.sub(
FormatConverter.fix_image_src_repl,
html_text
)


class Note:
"""Manages parsing notes into a dictionary formatted for AnkiConnect.
Expand All @@ -135,7 +213,7 @@ class Note:

def __init__(self, note_text):
"""Set up useful variables."""
self.text = FormatConverter.format(note_text)
self.text = note_text
self.lines = self.text.splitlines()
self.note_type = Note.note_subs[self.lines[0]]
self.subs = Note.field_subs[self.note_type]
Expand Down Expand Up @@ -201,10 +279,16 @@ def fields(self):
fields = dict.fromkeys(self.field_names, "")
for line in self.lines[1:]:
if self.next_sub and line.startswith(self.next_sub):
# This means we're entering a new field.
# So, we should format the text in the current field
self.current_field_num += 1
line = line[len(self.current_sub):]
fields[self.current_field] += line + " "
return {key: value.rstrip() for key, value in fields.items()}
fields[self.current_field] += line + "\n"
fields = {
key: FormatConverter.format(value)
for key, value in fields.items()
}
return {key: value.strip() for key, value in fields.items()}

def parse(self):
"""Get a properly formatted dictionary of the note."""
Expand Down Expand Up @@ -312,6 +396,7 @@ def __init__(self):
Note.TARGET_DECK = self.target_deck
print("Identified target deck as", Note.TARGET_DECK)
self.scan_file()
self.add_images()
self.add_notes()
self.write_ids()
self.update_fields()
Expand Down Expand Up @@ -368,6 +453,24 @@ def scan_file(self):
else:
self.notes_to_edit.append(parsed)

def add_images(self):
"""Add images from FormatConverter to Anki's media folder."""
print("Adding images with these paths...")
print(FormatConverter.IMAGE_PATHS)
AnkiConnect.invoke(
"multi",
actions=[
AnkiConnect.request(
"storeMediaFile",
filename=imgpath.replace(
imgpath, os.path.basename(imgpath)
),
data=file_encode(imgpath)
)
for imgpath in FormatConverter.IMAGE_PATHS
]
)

@staticmethod
def id_to_str(id):
"""Get the string repr of id."""
Expand Down

0 comments on commit 590bae1

Please sign in to comment.