diff --git a/src/eo_datascience/clean_nb.py b/src/eo_datascience/clean_nb.py index e792ace..3262e21 100644 --- a/src/eo_datascience/clean_nb.py +++ b/src/eo_datascience/clean_nb.py @@ -3,7 +3,8 @@ from pathlib import Path import re -def clean_up_frontmatter(dir = './notebooks', save=True): + +def clean_up_frontmatter(dir="./notebooks", save=True): # Define the path to the notebooks nb_paths = find_ipynb(dir) @@ -11,24 +12,24 @@ def clean_up_frontmatter(dir = './notebooks', save=True): for nb_path in nb_paths: # Load the notebook nb = nbformat.read(nb_path, as_version=4) - if nb.cells[0].source.startswith('---'): - #Load frontmatter - fm = nb.cells[0].source.split('\n') + if nb.cells[0].source.startswith("---"): + # Load frontmatter + fm = nb.cells[0].source.split("\n") # Extract the title and the subtitle and convert i = 1 line = fm[i] new_text = [] while not line.startswith("---"): - if line.startswith('title'): + if line.startswith("title"): new_text.append(f"# {line.split(': ')[1]}") - if line.startswith('subtitle'): + if line.startswith("subtitle"): new_text.append(f"**{line.split(': ')[1]}**") i += 1 line = fm[i] - new_text += fm[i+1:] + new_text += fm[i + 1 :] nb.cells[0].source = "\n".join(new_text) + "\n" # Save notebook if save: @@ -36,6 +37,7 @@ def clean_up_frontmatter(dir = './notebooks', save=True): else: return nb + def convert_bibliography(nb_path="./notebooks/references.ipynb", save=True): nb = nbformat.read(nb_path, as_version=4) nb.cells[0].source = """# References @@ -43,12 +45,13 @@ def convert_bibliography(nb_path="./notebooks/references.ipynb", save=True): ```{bibliography} ``` """ - # Save the notebook + # Save the notebook if save: nbformat.write(nb, nb_path) else: return nb + def convert_callout_notes(dir="./notebooks", save=True): nb_paths = find_ipynb(dir) @@ -57,27 +60,32 @@ def convert_callout_notes(dir="./notebooks", save=True): # Load the notebook nb = nbformat.read(nb_path, as_version=4) for i in range(len(nb.cells)): - if nb.cells[i]["cell_type"] == "markdown": nb.cells[i].source = quarto_note_replace(nb.cells[i].source) - # Save the notebook + # Save the notebook if save: nbformat.write(nb, nb_path) else: return nb - + + def quarto_note_replace(quarto): note_rst_start = r":::{note}" note_rst_end = r":::" nts = re.findall(r"(?<=:::\s\{\.callout\-note\})[^:::]+", quarto) for i in nts: - quarto = re.sub(r":::\s\{\.callout\-note\}" + re.escape(i) + r":::", note_rst_start + i + note_rst_end, quarto) + quarto = re.sub( + r":::\s\{\.callout\-note\}" + re.escape(i) + r":::", + note_rst_start + i + note_rst_end, + quarto, + ) return quarto + def convert_refs(dir="./notebooks", save=True): nb_paths = find_ipynb(dir) - + # Iterate over the notebooks for nb_path in nb_paths: # Load the notebook @@ -95,29 +103,34 @@ def convert_refs(dir="./notebooks", save=True): else: return nb + def quarto_ref_figure_replace(quarto): bibs = re.findall(r"(?<=\(\@)[^\)]+", quarto) for i in bibs: quarto = re.sub(r"\(\@" + i + "\)", r"", quarto) return quarto + def quarto_ref_person_replace(quarto): bibs = re.findall(r"(?<=\[\@)[^\]]+", quarto) for i in bibs: quarto = re.sub(r"\[\@" + i + "\]", r"{cite:p}`" + i + "`", quarto) return quarto + def quarto_ref_time_replace(quarto): bibs = re.findall(r"(?<=\@)[^\s]+", quarto) for i in bibs: quarto = re.sub(r"\@" + i, r"{cite:t}`" + i + "`", quarto) return quarto + def find_ipynb(dir): root = Path(dir).resolve() - nb_paths = [root / file for file in os.listdir(root) if file.endswith('.ipynb')] + nb_paths = [root / file for file in os.listdir(root) if file.endswith(".ipynb")] return nb_paths + def main(): clean_up_frontmatter() convert_callout_notes() @@ -125,5 +138,5 @@ def main(): convert_bibliography() -if __name__ == '__main__': - main() \ No newline at end of file +if __name__ == "__main__": + main() diff --git a/tests/test_quarto_nb_conversions.py b/tests/test_quarto_nb_conversions.py index 95451b6..9ed51f2 100644 --- a/tests/test_quarto_nb_conversions.py +++ b/tests/test_quarto_nb_conversions.py @@ -1,20 +1,46 @@ import nbformat from pathlib import Path import pytest -from eo_datascience.clean_nb import clean_up_frontmatter, convert_refs, quarto_ref_person_replace, quarto_ref_time_replace, \ - convert_callout_notes, quarto_note_replace +from eo_datascience.clean_nb import ( + clean_up_frontmatter, + convert_refs, + quarto_ref_person_replace, + quarto_ref_time_replace, + convert_callout_notes, + quarto_note_replace, + find_ipynb, +) + def test_remove_front_matter(): - assert clean_up_frontmatter("./tests", False)["cells"][0]["source"] == '# This a mock Jupyter file\n**We use it for testing**\n\nSome other text, which should not be deleted!\n' + assert ( + clean_up_frontmatter("./tests", False)["cells"][0]["source"] + == "# This a mock Jupyter file\n**We use it for testing**\n\nSome other text, which should not be deleted!\n" + ) + + +def test_find_ipynb(): + assert find_ipynb("tests")[0].stem == "mock" + def test_conversion_of_refs(): - quarto = [r"lorem ipsum [@anon2024] and [@anon2025]", r"lorem ipsum @anon2024 and @anon2025"] + quarto = [ + r"lorem ipsum [@anon2024] and [@anon2025]", + r"lorem ipsum @anon2024 and @anon2025", + ] quarto[0] = quarto_ref_person_replace(quarto[0]) quarto[1] = quarto_ref_time_replace(quarto[1]) - assert quarto == [r"lorem ipsum {cite:p}`anon2024` and {cite:p}`anon2025`", r"lorem ipsum {cite:t}`anon2024` and {cite:t}`anon2025`"] - assert convert_refs("./tests", False)["cells"][2]["source"] == r"lorem ipsum {cite:p}`anon2024` and {cite:p}`anon2025` and lorem ipsum {cite:t}`anon2024` and {cite:t}`anon2025`" + assert quarto == [ + r"lorem ipsum {cite:p}`anon2024` and {cite:p}`anon2025`", + r"lorem ipsum {cite:t}`anon2024` and {cite:t}`anon2025`", + ] + assert ( + convert_refs("./tests", False)["cells"][2]["source"] + == r"lorem ipsum {cite:p}`anon2024` and {cite:p}`anon2025` and lorem ipsum {cite:t}`anon2024` and {cite:t}`anon2025`" + ) + def test_conversion_of_callout_notes(): - rst = ':::{note}\nThis a callout note.\n:::' + rst = ":::{note}\nThis a callout note.\n:::" assert quarto_note_replace(r"::: {.callout-note}\nThis a callout note.\n:::") == rst - assert convert_callout_notes("./tests", False)["cells"][1]["source"] == rst + assert convert_callout_notes("./tests", False)["cells"][1]["source"] == rst