INRIA
diff --git a/‎.gitignore
-1 b/‎.gitignore
-1
diff --git a/‎.jupyter/README.md
-1 b/‎.jupyter/README.md
-1
diff --git a/‎.jupyter/jupyter_notebook_config.py
+1-1 b/‎.jupyter/jupyter_notebook_config.py
+1-1
diff --git a/‎.pre-commit-config.yaml
+6-6 b/‎.pre-commit-config.yaml
+6-6
diff --git a/‎build_tools/generate-exercise-from-solution.py
+24-14 b/‎build_tools/generate-exercise-from-solution.py
+24-14
diff --git a/‎build_tools/generate-index.py
+12-5 b/‎build_tools/generate-index.py
+12-5
diff --git a/‎build_tools/generate-quizzes.py
+19-12 b/‎build_tools/generate-quizzes.py
+19-12
diff --git a/‎build_tools/sanity-check.py
+6-5 b/‎build_tools/sanity-check.py
+6-5
diff --git a/‎datasets/penguins.csv
-1 b/‎datasets/penguins.csv
-1
diff --git a/‎figures/plot_iris_visualization.py
+12-13 b/‎figures/plot_iris_visualization.py
+12-13
@@ -33,4 +33,3 @@ doc/_build
 .idea
 *.code-workspace
 .vscode
-
@@ -1,2 +1 @@
 This directory is to setup jupyter on binder
-
@@ -1,2 +1,2 @@
 # To use jupytext in binder
-c.ContentsManager.preferred_jupytext_formats_read = 'py:percent'  # noqa
+c.ContentsManager.preferred_jupytext_formats_read = "py:percent"  # noqa
@@ -5,16 +5,16 @@ repos:
   -   id: check-yaml
   -   id: end-of-file-fixer
       exclude: notebooks
+      exclude_types: [svg]
   -   id: trailing-whitespace
       exclude: notebooks
+      exclude_types: [svg]
 - repo: https://github.com/psf/black
   rev: 23.1.0
   hooks:
   -   id: black
-- repo: https://github.com/pycqa/flake8
-  rev: 4.0.1
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.11.2
   hooks:
-    - id: flake8
-      entry: pflake8
-      additional_dependencies: [pyproject-flake8]
-      types: [file, python]
+  -   id: ruff
+      args: ["--fix", "--output-format=full"]
@@ -5,6 +5,7 @@
 from jupytext.myst import myst_to_notebook
 import jupytext
 
+
 def replace_simple_text(input_py_str):
     result = input_py_str.replace("📃 Solution for", "📝")
     return result
@@ -19,28 +20,35 @@ def remove_solution(input_py_str):
       before this comment and add "# Write your code here." at the end of the
       cell.
     """
-    nb = jupytext.reads(input_py_str, fmt='py:percent')
+    nb = jupytext.reads(input_py_str, fmt="py:percent")
 
-    cell_tags_list = [c['metadata'].get('tags') for c in nb.cells]
-    is_solution_list = [tags is not None and 'solution' in tags
-                        for tags in cell_tags_list]
+    cell_tags_list = [c["metadata"].get("tags") for c in nb.cells]
+    is_solution_list = [
+        tags is not None and "solution" in tags for tags in cell_tags_list
+    ]
     # Completely remove cells with "solution" tags
-    nb.cells = [cell for cell, is_solution in zip(nb.cells, is_solution_list)
-                if not is_solution]
+    nb.cells = [
+        cell
+        for cell, is_solution in zip(nb.cells, is_solution_list)
+        if not is_solution
+    ]
 
     # Partial cell removal based on "# solution" comment
     marker = "# solution"
-    pattern = re.compile(f"^{marker}.*", flags=re.MULTILINE|re.DOTALL)
+    pattern = re.compile(f"^{marker}.*", flags=re.MULTILINE | re.DOTALL)
 
-    cells_to_modify = [c for c in nb.cells if c["cell_type"] == "code" and
-                       marker in c["source"]]
+    cells_to_modify = [
+        c
+        for c in nb.cells
+        if c["cell_type"] == "code" and marker in c["source"]
+    ]
 
     for c in cells_to_modify:
         c["source"] = pattern.sub("# Write your code here.", c["source"])
 
     # TODO: we could potentially try to avoid changing the input file jupytext
     # header since this info is rarely useful. Let's keep it simple for now.
-    py_nb_str = jupytext.writes(nb, fmt='py:percent')
+    py_nb_str = jupytext.writes(nb, fmt="py:percent")
     return py_nb_str
 
 
@@ -49,7 +57,7 @@ def write_exercise(solution_path, exercise_path):
 
     output_str = input_str
     for replace_func in [replace_simple_text, remove_solution]:
-        output_str= replace_func(output_str)
+        output_str = replace_func(output_str)
     exercise_path.write_text(output_str)
 
 
@@ -70,12 +78,14 @@ def write_all_exercises(python_scripts_folder):
     if path.is_dir():
         write_all_exercises(path)
     else:
-        if '_ex_' not in str(path):
+        if "_ex_" not in str(path):
             raise ValueError(
-                f'Path argument should be an exercise file. Path was {path}')
+                f"Path argument should be an exercise file. Path was {path}"
+            )
         solution_path = Path(str(path).replace("_ex_", "_sol_"))
         if not solution_path.exists():
             raise ValueError(
-                f"{solution_path} does not exist, check argument path {path}")
+                f"{solution_path} does not exist, check argument path {path}"
+            )
 
         write_exercise(solution_path, path)
@@ -41,7 +41,7 @@ def get_first_title(path):
     elif path.suffix == ".md":
         md_str = path.read_text()
     else:
-        raise ValueError(f"{filename} is not a .py or a .md file")
+        raise ValueError(f"{path} is not a .py or a .md file")
 
     return get_first_title_from_md_str(md_str)
 
@@ -96,7 +96,9 @@ def get_single_file_markdown(docname):
         # This is simpler to point to inria.github.io generated HTML otherwise
         # there are quirks (MyST in quizzes not supported, slides not working,
         # etc ...)
-        relative_url = str(target).replace("jupyter-book/", "").replace(".md", ".html")
+        relative_url = (
+            str(target).replace("jupyter-book/", "").replace(".md", ".html")
+        )
         target = f"https://inria.github.io/scikit-learn-mooc/{relative_url}"
 
     return f"[{title}]({target})"
@@ -140,7 +142,9 @@ def test_get_lesson_markdown():
     documents = json_info["documents"]
     print(
         get_lesson_markdown(
-            documents["predictive_modeling_pipeline/01_tabular_data_exploration_index"]
+            documents[
+                "predictive_modeling_pipeline/01_tabular_data_exploration_index"
+            ]
         )
     )
 
@@ -156,7 +160,8 @@ def get_module_markdown(module_dict, documents):
     module_title = module_dict["caption"]
     heading = f"# {module_title}"
     content = "\n\n".join(
-        get_lesson_markdown(documents[docname]) for docname in module_dict["items"]
+        get_lesson_markdown(documents[docname])
+        for docname in module_dict["items"]
     )
     return f"{heading}\n\n{content}"
 
@@ -219,7 +224,9 @@ def get_full_index_ipynb(toc_path):
     md_str = get_full_index_markdown(toc_path)
     nb = jupytext.reads(md_str, format=".md")
 
-    nb = nbformat.v4.new_notebook(cells=[nbformat.v4.new_markdown_cell(md_str)])
+    nb = nbformat.v4.new_notebook(
+        cells=[nbformat.v4.new_markdown_cell(md_str)]
+    )
 
     # nb_content = jupytext.writes(nb, fmt=".ipynb")
     # nb = json.loads(nb_content)
 
@@ -13,16 +13,21 @@ def remove_solution(input_myst_str):
     """
     nb = myst_to_notebook(input_myst_str)
 
-    cell_tags_list = [c['metadata'].get('tags') for c in nb.cells]
-    is_solution_list = [tags is not None and 'solution' in tags
-                        for tags in cell_tags_list]
-    nb.cells = [cell for cell, is_solution in zip(nb.cells, is_solution_list)
-                if not is_solution]
-
-    myst_nb_str = jupytext.writes(nb, fmt='myst')
-
-    header_pattern = re.compile(r"---\njupytext.+---\s*",
-                                re.DOTALL | re.MULTILINE)
+    cell_tags_list = [c["metadata"].get("tags") for c in nb.cells]
+    is_solution_list = [
+        tags is not None and "solution" in tags for tags in cell_tags_list
+    ]
+    nb.cells = [
+        cell
+        for cell, is_solution in zip(nb.cells, is_solution_list)
+        if not is_solution
+    ]
+
+    myst_nb_str = jupytext.writes(nb, fmt="myst")
+
+    header_pattern = re.compile(
+        r"---\njupytext.+---\s*", re.DOTALL | re.MULTILINE
+    )
     return re.sub(header_pattern, "", myst_nb_str)
 
 
@@ -39,12 +44,14 @@ def write_all_exercises(input_root_path, output_root_path):
 
     for input_path in input_exercises:
         # FIXME there may be a better way with the pathlib API
-        relative_path_str = re.sub(str(input_root_path) + "/?", "",
-                                   str(input_path))
+        relative_path_str = re.sub(
+            str(input_root_path) + "/?", "", str(input_path)
+        )
         output_path = Path(output_root_path).joinpath(relative_path_str)
         print(str(input_path), str(output_path))
         write_exercise_myst(input_path, output_path)
 
+
 if __name__ == "__main__":
     input_root_path = sys.argv[1]
     output_root_path = sys.argv[2]
 
@@ -4,8 +4,8 @@
 
 # TODO: we could get the list from .gitignore
 IGNORE_LIST = [
-    '.ipynb_checkpoints',
-    '__pycache__',
+    ".ipynb_checkpoints",
+    "__pycache__",
 ]
 
 folder1, folder2 = sys.argv[1:3]
@@ -28,6 +28,7 @@ def get_basename(folder):
     only_in_folder2 = set(basenames2) - set(basenames1)
 
     raise RuntimeError(
-        f'Inconsistency between folder {folder1} and {folder2}\n'
-        f'Only in folder {folder1}: {only_in_folder1}\n'
-        f'Only in folder {folder2}: {only_in_folder2}')
+        f"Inconsistency between folder {folder1} and {folder2}\n"
+        f"Only in folder {folder1}: {only_in_folder1}\n"
+        f"Only in folder {folder2}: {only_in_folder2}"
+    )
@@ -343,4 +343,3 @@ PAL0910,65,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg
 PAL0910,66,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N99A2,No,2009-11-21,49.6,18.2,193,3775,MALE,9.4618,-24.70615,Nest never observed with full clutch.
 PAL0910,67,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N100A1,Yes,2009-11-21,50.8,19,210,4100,MALE,9.98044,-24.68741,NA
 PAL0910,68,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N100A2,Yes,2009-11-21,50.2,18.7,198,3775,FEMALE,9.39305,-24.25255,NA
-
@@ -18,20 +18,19 @@
     plt.figure(figsize=(2.5, 2))
     patches = list()
     for this_y, target_name in enumerate(iris.target_names):
-        patch = plt.hist(x[y == this_y],
-                         bins=np.linspace(x.min(), x.max(), 20),
-                         label=target_name)
+        patch = plt.hist(
+            x[y == this_y],
+            bins=np.linspace(x.min(), x.max(), 20),
+            label=target_name,
+        )
         patches.append(patch[-1][0])
     style_figs.light_axis()
-    feature_name = feature_name.replace(' ', '_')
-    feature_name = feature_name.replace('(', '')
-    feature_name = feature_name.replace(')', '')
-    plt.savefig('iris_{}_hist.svg'.format(feature_name))
+    feature_name = feature_name.replace(" ", "_")
+    feature_name = feature_name.replace("(", "")
+    feature_name = feature_name.replace(")", "")
+    plt.savefig("iris_{}_hist.svg".format(feature_name))
 
-plt.figure(figsize=(6, .25))
-plt.legend(patches, iris.target_names, ncol=3, loc=(0, -.37),
-           borderaxespad=0)
+plt.figure(figsize=(6, 0.25))
+plt.legend(patches, iris.target_names, ncol=3, loc=(0, -0.37), borderaxespad=0)
 style_figs.no_axis()
-plt.savefig('legend_irises.svg')
-
-
+plt.savefig("legend_irises.svg")
-Original file line number
+Diff line change
 .idea
 *.code-workspace
 .vscode
+-
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1 @@`
`1`	`1`	`This directory is to setup jupyter on binder`
`2`		`-`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`# To use jupytext in binder`
`2`		`-c.ContentsManager.preferred_jupytext_formats_read = 'py:percent' # noqa`
	`2`	`+c.ContentsManager.preferred_jupytext_formats_read = "py:percent" # noqa`