|
1 |
| -from typing import Any |
| 1 | +import os |
| 2 | +import re |
| 3 | +import shutil |
2 | 4 | from mdconverter.mdconverter import get_default_css
|
3 | 5 | from nbconvert import MarkdownExporter
|
| 6 | +from nbconvert.preprocessors import Preprocessor |
4 | 7 | import nbformat
|
5 | 8 |
|
6 | 9 |
|
7 |
| -class Mdconverter: |
8 |
| - def __init__(self) -> None: |
9 |
| - self.css_filename = "" |
10 |
| - self.filename = "" |
11 |
| - self.output_filename = "" |
12 |
| - |
13 |
| - def run(self): |
14 |
| - # TODO making mdconverter using mdconvert.py |
15 |
| - # self.run_mdconverter() |
16 |
| - self.run_ndconverter( |
17 |
| - filename=self.filename, |
18 |
| - output_filename=self.output_filename, |
19 |
| - css_filename=self.css_filename, |
20 |
| - ) |
| 10 | +class Ndconverter: |
| 11 | + """Base class for converting Jupyter Notebook (.ipynb) files to Markdown (.md) format""" |
21 | 12 |
|
22 |
| - def run_ndconverter( |
23 |
| - self, filename: str, output_filename: str, css_filename: str |
| 13 | + def __init__( |
| 14 | + self, |
| 15 | + css_filename: str = "css/styles.css", |
| 16 | + post_fix: str = "-(NEW)", |
24 | 17 | ) -> None:
|
25 |
| - notebook_content = self.load_ipynb(filename) |
26 |
| - script = self.markdown_exporter(notebook_content) |
27 |
| - self.ndconverter_script = self.add_prefix_css(script, css_filename) |
28 |
| - self.save_script(self.ndconverter_script, output_filename) |
| 18 | + """ |
| 19 | + Initialize Ndconverter class |
| 20 | +
|
| 21 | + Args: |
| 22 | + css_filename: Path to the CSS file to add to the Markdown header |
| 23 | + post_fix: Suffix to add to the output Markdown file name |
| 24 | + """ |
| 25 | + self.css_filename = css_filename |
| 26 | + self.filename = "" |
| 27 | + self.post_fix = post_fix |
| 28 | + self.notebook_content = None |
| 29 | + self.script = "" |
| 30 | + self.resources = None |
| 31 | + self.ndconverter_script = "" |
29 | 32 |
|
30 |
| - def load_ipynb(self, filename: str) -> Any: |
31 |
| - with open(filename, "r", encoding="utf-8") as f: |
32 |
| - notebook_content = nbformat.read(f, as_version=4) |
33 |
| - return notebook_content |
| 33 | + def run(self, save_on: bool = True) -> None: |
| 34 | + """Run the process to convert the notebook to Markdown""" |
| 35 | + print("<!----Start---->") |
| 36 | + self.run_ndconverter(save_on=save_on) |
| 37 | + print("<!----End---->") |
34 | 38 |
|
35 |
| - def markdown_exporter(self, notebook_content: Any) -> str: |
| 39 | + def run_ndconverter(self, save_on: bool) -> None: |
| 40 | + """ |
| 41 | + Execute the full conversion process, including loading the notebook, |
| 42 | + exporting it to Markdown, and saving the result. |
| 43 | +
|
| 44 | + :param save_on: Whether to save the converted Markdown file to disk. |
| 45 | + """ |
| 46 | + self._load_ipynb() # make notebook_content |
| 47 | + self._markdown_exporter() # make script, resources |
| 48 | + self.ndconverter_script = self._add_prefix_css() |
| 49 | + if save_on: |
| 50 | + self._save_script() |
| 51 | + |
| 52 | + def _load_ipynb(self) -> None: |
| 53 | + """Load the Jupyter Notebook content from the specified file.""" |
| 54 | + print(f"Loading file : {self.filename}") |
| 55 | + with open(self.filename, "r", encoding="utf-8") as f: |
| 56 | + self.notebook_content = nbformat.read(f, as_version=4) |
| 57 | + |
| 58 | + def _markdown_exporter(self) -> None: |
| 59 | + """Convert notebook content to Markdown format""" |
36 | 60 | exporter = MarkdownExporter()
|
37 |
| - # MarkdownExporter를 사용하여 md로 변환 |
38 |
| - (script, resources) = exporter.from_notebook_node(notebook_content) |
39 |
| - return script |
| 61 | + self.script, self.resources = exporter.from_notebook_node(self.notebook_content) |
40 | 62 |
|
41 |
| - def add_prefix_css(self, script: str, css_filename: str): |
42 |
| - return f"{get_default_css(css_filename)}\n\n{''.join(script)}" |
| 63 | + def _add_prefix_css(self) -> str: |
| 64 | + """Add CSS content to the beginning of the Markdown script""" |
| 65 | + return f"{get_default_css(self.css_filename)}\n\n{self.script}" |
43 | 66 |
|
44 |
| - def save_script(self, script: str, output_filename: str) -> None: |
45 |
| - # 변환된 스크립트 저장 |
| 67 | + def _save_script(self) -> None: |
| 68 | + """Save the converted script""" |
| 69 | + output_filename = self.filename.replace(".ipynb", f"{self.post_fix}.md") |
| 70 | + print(f"Saving file : {output_filename}") |
46 | 71 | with open(output_filename, "w", encoding="utf-8") as f:
|
47 |
| - f.write(script) |
| 72 | + f.write(self.ndconverter_script) |
| 73 | + |
| 74 | + |
| 75 | +class CustomPreprocessor(Preprocessor): |
| 76 | + """Override preprocess_cell""" |
| 77 | + |
| 78 | + def preprocess_cell(self, cell, resources, index): |
| 79 | + if cell.get("cell_type", "") == "markdown": |
| 80 | + # markdown |
| 81 | + pass |
| 82 | + elif cell.get("cell_type", "") == "code": |
| 83 | + # code |
| 84 | + pass |
| 85 | + return cell, resources |
| 86 | + |
| 87 | + |
| 88 | +class CustomMdconverter(Ndconverter): |
| 89 | + """Custom Markdown converter""" |
| 90 | + |
| 91 | + def __init__(self, *args, **kwargs) -> None: |
| 92 | + super().__init__(*args, **kwargs) |
| 93 | + self.img_dir = "" |
| 94 | + |
| 95 | + def _markdown_exporter(self) -> None: |
| 96 | + """Convert to Markdown and handle images""" |
| 97 | + exporter = MarkdownExporter() |
| 98 | + exporter.register_preprocessor(CustomPreprocessor, enabled=True) |
| 99 | + exporter.exclude_input_prompt = True # exclude "In[0]:" |
| 100 | + exporter.exclude_output_prompt = True # exclude "Out[0]:" |
| 101 | + |
| 102 | + self.script, self.resources = exporter.from_notebook_node(self.notebook_content) |
| 103 | + self._setup_image_processing() |
| 104 | + |
| 105 | + def _setup_image_processing(self) -> None: |
| 106 | + """Set up image processing""" |
| 107 | + self._extracting_img_path() |
| 108 | + if self.resources.get("outputs"): |
| 109 | + self._process_output_images() |
| 110 | + |
| 111 | + def _extracting_img_path(self) -> None: |
| 112 | + """Set image directory path""" |
| 113 | + folder_name = os.path.dirname(self.filename) |
| 114 | + self.img_dir = os.path.join(folder_name, "img") |
| 115 | + os.makedirs(self.img_dir, exist_ok=True) |
| 116 | + print(f"Setting image dir : {self.img_dir}") |
| 117 | + |
| 118 | + def _process_output_images(self) -> None: |
| 119 | + """Save image files and update paths""" |
| 120 | + for img_filename, image_data in self.resources["outputs"].items(): |
| 121 | + img_path = os.path.join(self.img_dir, img_filename) |
| 122 | + self._save_image(img_path, image_data) |
| 123 | + self._update_image_path(img_filename, img_path) |
| 124 | + |
| 125 | + def _save_image(self, img_path: str, image_data: bytes) -> None: |
| 126 | + """Save image file""" |
| 127 | + print(f"Saving image : {img_path}") |
| 128 | + with open(img_path, "wb") as f: |
| 129 | + f.write(image_data) |
| 130 | + |
| 131 | + def _update_image_path(self, img_filename: str, img_path: str) -> None: |
| 132 | + """Update image path in Markdown""" |
| 133 | + img_type = self._get_image_type(img_filename) |
| 134 | + if img_type: |
| 135 | + old_pattern = f"" |
| 136 | + new_pattern = f"" |
| 137 | + self.script = self.script.replace(old_pattern, new_pattern) |
| 138 | + print(f"Update path of imags : {img_filename} -> {img_path}") |
| 139 | + |
| 140 | + @staticmethod |
| 141 | + def _get_image_type(filename: str) -> str: |
| 142 | + """Check image type""" |
| 143 | + if filename.endswith((".jpg", ".jpeg")): |
| 144 | + return "jpeg" |
| 145 | + elif filename.endswith(".png"): |
| 146 | + return "png" |
| 147 | + return "" |
| 148 | + |
| 149 | + def _process_markdown_images_pattern(self) -> None: |
| 150 | + """Handle Markdown image patterns""" |
| 151 | + pattern = r"!\[([^\]]+)\]\((\.\/assets\/[^)]+)\)" |
| 152 | + for match in re.finditer(pattern, self.script): |
| 153 | + desc, old_path = match.groups() |
| 154 | + self._process_markdown_image(desc, old_path) |
| 155 | + |
| 156 | + def _process_markdown_image(self, desc: str, old_path: str) -> None: |
| 157 | + """Handle individual Markdown image""" |
| 158 | + filename = os.path.basename(old_path) |
| 159 | + new_path = f"{self.img_dir}/{filename}" |
| 160 | + abs_old_path = self._get_absolute_path(old_path) |
| 161 | + |
| 162 | + if os.path.exists(abs_old_path): |
| 163 | + shutil.copy2(abs_old_path, new_path) |
| 164 | + self._update_markdown_image_path(desc, old_path, new_path) |
| 165 | + |
| 166 | + def _get_absolute_path(self, old_path: str) -> str: |
| 167 | + """Convert relative path to absolute path""" |
| 168 | + return os.path.abspath( |
| 169 | + os.path.join(os.path.dirname(self.filename), old_path.lstrip("./")) |
| 170 | + ) |
| 171 | + |
| 172 | + def _update_markdown_image_path( |
| 173 | + self, desc: str, old_path: str, new_path: str |
| 174 | + ) -> None: |
| 175 | + """Update image path in Markdown""" |
| 176 | + old_pattern = f"" |
| 177 | + new_pattern = f"" |
| 178 | + self.script = self.script.replace(old_pattern, new_pattern) |
| 179 | + print(f"In Markdown docs, image path : {old_path} -> {new_path}") |
| 180 | + |
| 181 | + |
| 182 | +class MultiNdconverter(CustomMdconverter): |
| 183 | + """Multi-file converter""" |
| 184 | + |
| 185 | + def __init__(self, filenames: list) -> None: |
| 186 | + super().__init__() |
| 187 | + self.filenames = filenames |
48 | 188 |
|
| 189 | + def add_file(self, filename: str) -> None: |
| 190 | + """Add file to convert""" |
| 191 | + self.filenames.append(filename) |
49 | 192 |
|
50 |
| -# TODO Creating Multi loader |
| 193 | + def run(self, save_on: bool = True) -> None: |
| 194 | + """Run conversion for all files""" |
| 195 | + for filename in self.filenames: |
| 196 | + self.filename = filename |
| 197 | + super().run(save_on) |
0 commit comments