Skip to content

Commit dda494b

Browse files
committed
⚡️ improve code
1 parent fd2ed6d commit dda494b

File tree

1 file changed

+182
-35
lines changed

1 file changed

+182
-35
lines changed

mdconverter/mdconverter_class.py

Lines changed: 182 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,197 @@
1-
from typing import Any
1+
import os
2+
import re
3+
import shutil
24
from mdconverter.mdconverter import get_default_css
35
from nbconvert import MarkdownExporter
6+
from nbconvert.preprocessors import Preprocessor
47
import nbformat
58

69

7-
class Mdconverter:
8-
def __init__(self) -> None:
9-
self.css_filename = ""
10-
self.filename = ""
11-
self.output_filename = ""
12-
13-
def run(self):
14-
# TODO making mdconverter using mdconvert.py
15-
# self.run_mdconverter()
16-
self.run_ndconverter(
17-
filename=self.filename,
18-
output_filename=self.output_filename,
19-
css_filename=self.css_filename,
20-
)
10+
class Ndconverter:
11+
"""Base class for converting Jupyter Notebook (.ipynb) files to Markdown (.md) format"""
2112

22-
def run_ndconverter(
23-
self, filename: str, output_filename: str, css_filename: str
13+
def __init__(
14+
self,
15+
css_filename: str = "css/styles.css",
16+
post_fix: str = "-(NEW)",
2417
) -> None:
25-
notebook_content = self.load_ipynb(filename)
26-
script = self.markdown_exporter(notebook_content)
27-
self.ndconverter_script = self.add_prefix_css(script, css_filename)
28-
self.save_script(self.ndconverter_script, output_filename)
18+
"""
19+
Initialize Ndconverter class
20+
21+
Args:
22+
css_filename: Path to the CSS file to add to the Markdown header
23+
post_fix: Suffix to add to the output Markdown file name
24+
"""
25+
self.css_filename = css_filename
26+
self.filename = ""
27+
self.post_fix = post_fix
28+
self.notebook_content = None
29+
self.script = ""
30+
self.resources = None
31+
self.ndconverter_script = ""
2932

30-
def load_ipynb(self, filename: str) -> Any:
31-
with open(filename, "r", encoding="utf-8") as f:
32-
notebook_content = nbformat.read(f, as_version=4)
33-
return notebook_content
33+
def run(self, save_on: bool = True) -> None:
34+
"""Run the process to convert the notebook to Markdown"""
35+
print("<!----Start---->")
36+
self.run_ndconverter(save_on=save_on)
37+
print("<!----End---->")
3438

35-
def markdown_exporter(self, notebook_content: Any) -> str:
39+
def run_ndconverter(self, save_on: bool) -> None:
40+
"""
41+
Execute the full conversion process, including loading the notebook,
42+
exporting it to Markdown, and saving the result.
43+
44+
:param save_on: Whether to save the converted Markdown file to disk.
45+
"""
46+
self._load_ipynb() # make notebook_content
47+
self._markdown_exporter() # make script, resources
48+
self.ndconverter_script = self._add_prefix_css()
49+
if save_on:
50+
self._save_script()
51+
52+
def _load_ipynb(self) -> None:
53+
"""Load the Jupyter Notebook content from the specified file."""
54+
print(f"Loading file : {self.filename}")
55+
with open(self.filename, "r", encoding="utf-8") as f:
56+
self.notebook_content = nbformat.read(f, as_version=4)
57+
58+
def _markdown_exporter(self) -> None:
59+
"""Convert notebook content to Markdown format"""
3660
exporter = MarkdownExporter()
37-
# MarkdownExporter를 사용하여 md로 변환
38-
(script, resources) = exporter.from_notebook_node(notebook_content)
39-
return script
61+
self.script, self.resources = exporter.from_notebook_node(self.notebook_content)
4062

41-
def add_prefix_css(self, script: str, css_filename: str):
42-
return f"{get_default_css(css_filename)}\n\n{''.join(script)}"
63+
def _add_prefix_css(self) -> str:
64+
"""Add CSS content to the beginning of the Markdown script"""
65+
return f"{get_default_css(self.css_filename)}\n\n{self.script}"
4366

44-
def save_script(self, script: str, output_filename: str) -> None:
45-
# 변환된 스크립트 저장
67+
def _save_script(self) -> None:
68+
"""Save the converted script"""
69+
output_filename = self.filename.replace(".ipynb", f"{self.post_fix}.md")
70+
print(f"Saving file : {output_filename}")
4671
with open(output_filename, "w", encoding="utf-8") as f:
47-
f.write(script)
72+
f.write(self.ndconverter_script)
73+
74+
75+
class CustomPreprocessor(Preprocessor):
76+
"""Override preprocess_cell"""
77+
78+
def preprocess_cell(self, cell, resources, index):
79+
if cell.get("cell_type", "") == "markdown":
80+
# markdown
81+
pass
82+
elif cell.get("cell_type", "") == "code":
83+
# code
84+
pass
85+
return cell, resources
86+
87+
88+
class CustomMdconverter(Ndconverter):
89+
"""Custom Markdown converter"""
90+
91+
def __init__(self, *args, **kwargs) -> None:
92+
super().__init__(*args, **kwargs)
93+
self.img_dir = ""
94+
95+
def _markdown_exporter(self) -> None:
96+
"""Convert to Markdown and handle images"""
97+
exporter = MarkdownExporter()
98+
exporter.register_preprocessor(CustomPreprocessor, enabled=True)
99+
exporter.exclude_input_prompt = True # exclude "In[0]:"
100+
exporter.exclude_output_prompt = True # exclude "Out[0]:"
101+
102+
self.script, self.resources = exporter.from_notebook_node(self.notebook_content)
103+
self._setup_image_processing()
104+
105+
def _setup_image_processing(self) -> None:
106+
"""Set up image processing"""
107+
self._extracting_img_path()
108+
if self.resources.get("outputs"):
109+
self._process_output_images()
110+
111+
def _extracting_img_path(self) -> None:
112+
"""Set image directory path"""
113+
folder_name = os.path.dirname(self.filename)
114+
self.img_dir = os.path.join(folder_name, "img")
115+
os.makedirs(self.img_dir, exist_ok=True)
116+
print(f"Setting image dir : {self.img_dir}")
117+
118+
def _process_output_images(self) -> None:
119+
"""Save image files and update paths"""
120+
for img_filename, image_data in self.resources["outputs"].items():
121+
img_path = os.path.join(self.img_dir, img_filename)
122+
self._save_image(img_path, image_data)
123+
self._update_image_path(img_filename, img_path)
124+
125+
def _save_image(self, img_path: str, image_data: bytes) -> None:
126+
"""Save image file"""
127+
print(f"Saving image : {img_path}")
128+
with open(img_path, "wb") as f:
129+
f.write(image_data)
130+
131+
def _update_image_path(self, img_filename: str, img_path: str) -> None:
132+
"""Update image path in Markdown"""
133+
img_type = self._get_image_type(img_filename)
134+
if img_type:
135+
old_pattern = f"![{img_type}]({img_filename})"
136+
new_pattern = f"![{img_type}]({img_path})"
137+
self.script = self.script.replace(old_pattern, new_pattern)
138+
print(f"Update path of imags : {img_filename} -> {img_path}")
139+
140+
@staticmethod
141+
def _get_image_type(filename: str) -> str:
142+
"""Check image type"""
143+
if filename.endswith((".jpg", ".jpeg")):
144+
return "jpeg"
145+
elif filename.endswith(".png"):
146+
return "png"
147+
return ""
148+
149+
def _process_markdown_images_pattern(self) -> None:
150+
"""Handle Markdown image patterns"""
151+
pattern = r"!\[([^\]]+)\]\((\.\/assets\/[^)]+)\)"
152+
for match in re.finditer(pattern, self.script):
153+
desc, old_path = match.groups()
154+
self._process_markdown_image(desc, old_path)
155+
156+
def _process_markdown_image(self, desc: str, old_path: str) -> None:
157+
"""Handle individual Markdown image"""
158+
filename = os.path.basename(old_path)
159+
new_path = f"{self.img_dir}/{filename}"
160+
abs_old_path = self._get_absolute_path(old_path)
161+
162+
if os.path.exists(abs_old_path):
163+
shutil.copy2(abs_old_path, new_path)
164+
self._update_markdown_image_path(desc, old_path, new_path)
165+
166+
def _get_absolute_path(self, old_path: str) -> str:
167+
"""Convert relative path to absolute path"""
168+
return os.path.abspath(
169+
os.path.join(os.path.dirname(self.filename), old_path.lstrip("./"))
170+
)
171+
172+
def _update_markdown_image_path(
173+
self, desc: str, old_path: str, new_path: str
174+
) -> None:
175+
"""Update image path in Markdown"""
176+
old_pattern = f"![{desc}]({old_path})"
177+
new_pattern = f"![{desc}]({new_path})"
178+
self.script = self.script.replace(old_pattern, new_pattern)
179+
print(f"In Markdown docs, image path : {old_path} -> {new_path}")
180+
181+
182+
class MultiNdconverter(CustomMdconverter):
183+
"""Multi-file converter"""
184+
185+
def __init__(self, filenames: list) -> None:
186+
super().__init__()
187+
self.filenames = filenames
48188

189+
def add_file(self, filename: str) -> None:
190+
"""Add file to convert"""
191+
self.filenames.append(filename)
49192

50-
# TODO Creating Multi loader
193+
def run(self, save_on: bool = True) -> None:
194+
"""Run conversion for all files"""
195+
for filename in self.filenames:
196+
self.filename = filename
197+
super().run(save_on)

0 commit comments

Comments
 (0)