INRIA
diff --git a/‎.github/workflows/deploy-gh-pages.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/deploy-gh-pages.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/jupyter-book-pr-preview.yml
Lines changed: 4 additions & 3 deletions b/‎.github/workflows/jupyter-book-pr-preview.yml
Lines changed: 4 additions & 3 deletions
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎.jupyter/README.md
Lines changed: 1 addition & 0 deletions b/‎.jupyter/README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎.jupyter/jupyter_notebook_config.py
Lines changed: 1 addition & 1 deletion b/‎.jupyter/jupyter_notebook_config.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 6 additions & 6 deletions b/‎.pre-commit-config.yaml
Lines changed: 6 additions & 6 deletions
diff --git a/‎CITATION.cff
Lines changed: 0 additions & 8 deletions b/‎CITATION.cff
Lines changed: 0 additions & 8 deletions
diff --git a/‎README.md
Lines changed: 4 additions & 3 deletions b/‎README.md
Lines changed: 4 additions & 3 deletions
diff --git a/‎build_tools/generate-exercise-from-solution.py
Lines changed: 16 additions & 49 deletions b/‎build_tools/generate-exercise-from-solution.py
Lines changed: 16 additions & 49 deletions
diff --git a/‎build_tools/generate-index.py
Lines changed: 5 additions & 12 deletions b/‎build_tools/generate-index.py
Lines changed: 5 additions & 12 deletions
diff --git a/‎build_tools/generate-quizzes.py
Lines changed: 12 additions & 19 deletions b/‎build_tools/generate-quizzes.py
Lines changed: 12 additions & 19 deletions
diff --git a/‎build_tools/sanity-check.py
Lines changed: 5 additions & 6 deletions b/‎build_tools/sanity-check.py
Lines changed: 5 additions & 6 deletions
diff --git a/‎check_env.py
Lines changed: 1 addition & 1 deletion b/‎check_env.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎datasets/penguins.csv
Lines changed: 1 addition & 0 deletions b/‎datasets/penguins.csv
Lines changed: 1 addition & 0 deletions
diff --git a/‎environment-dev.yml
Lines changed: 1 addition & 1 deletion b/‎environment-dev.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎environment.yml
Lines changed: 1 addition & 1 deletion b/‎environment.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎figures/cross_validation_train_test_diagram.png
-35.3 KB b/‎figures/cross_validation_train_test_diagram.png
-35.3 KB
diff --git a/‎figures/nested_cross_validation_diagram.png
-78.1 KB b/‎figures/nested_cross_validation_diagram.png
-78.1 KB
diff --git a/‎figures/plot_iris_visualization.py
Lines changed: 13 additions & 12 deletions b/‎figures/plot_iris_visualization.py
Lines changed: 13 additions & 12 deletions
@@ -58,7 +58,7 @@ jobs:
 
     - name: Upload jupyter-book artifact for preview in PRs
       if: ${{ github.event_name == 'pull_request' }}
-      uses: actions/upload-artifact@v4
+      uses: actions/upload-artifact@v3
       with:
         name: jupyter-book
         path: |
 
@@ -19,10 +19,11 @@ jobs:
           sha: ${{ github.event.workflow_run.head_sha }}
           context: 'JupyterBook preview'
 
-      - uses: actions/download-artifact@v4
+      - uses: dawidd6/action-download-artifact@v2
         with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
-          run-id: ${{ github.event.workflow_run.id }}
+          github_token: ${{secrets.GITHUB_TOKEN}}
+          workflow: deploy-gh-pages.yml
+          run_id: ${{ github.event.workflow_run.id }}
           name: jupyter-book
 
       - name: Get pull request number
 
@@ -33,3 +33,4 @@ doc/_build
 .idea
 *.code-workspace
 .vscode
+
@@ -1 +1,2 @@
 This directory is to setup jupyter on binder
+
@@ -1,2 +1,2 @@
 # To use jupytext in binder
-c.ContentsManager.preferred_jupytext_formats_read = "py:percent"  # noqa
+c.ContentsManager.preferred_jupytext_formats_read = 'py:percent'  # noqa
@@ -5,16 +5,16 @@ repos:
   -   id: check-yaml
   -   id: end-of-file-fixer
       exclude: notebooks
-      exclude_types: [svg]
   -   id: trailing-whitespace
       exclude: notebooks
-      exclude_types: [svg]
 - repo: https://github.com/psf/black
   rev: 23.1.0
   hooks:
   -   id: black
-- repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.11.2
+- repo: https://github.com/pycqa/flake8
+  rev: 4.0.1
   hooks:
-  -   id: ruff
-      args: ["--fix", "--output-format=full"]
+    - id: flake8
+      entry: pflake8
+      additional_dependencies: [pyproject-flake8]
+      types: [file, python]
@@ -1,8 +1,9 @@
 # scikit-learn course
 
-This is the source code for the [Machine learning in Python with scikit-learn
-MOOC](https://www.fun-mooc.fr/en/courses/machine-learning-python-scikit-learn).
-Enroll for the full MOOC experience (quiz solutions, executable
+📢 📢 📢 A new session of the [Machine learning in Python with scikit-learn
+MOOC](https://www.fun-mooc.fr/en/courses/machine-learning-python-scikit-learn),
+is available starting on November 8th, 2023 and will remain open on self-paced
+mode. Enroll for the full MOOC experience (quizz solutions, executable
 notebooks, discussion forum, etc ...) !
 
 The MOOC is free and hosted on the [FUN-MOOC](https://fun-mooc.fr/) platform
 
@@ -5,10 +5,6 @@
 from jupytext.myst import myst_to_notebook
 import jupytext
 
-
-WRITE_YOUR_CODE_COMMENT = "# Write your code here."
-
-
 def replace_simple_text(input_py_str):
     result = input_py_str.replace("📃 Solution for", "📝")
     return result
@@ -23,62 +19,37 @@ def remove_solution(input_py_str):
       before this comment and add "# Write your code here." at the end of the
       cell.
     """
-    nb = jupytext.reads(input_py_str, fmt="py:percent")
+    nb = jupytext.reads(input_py_str, fmt='py:percent')
 
-    cell_tags_list = [c["metadata"].get("tags") for c in nb.cells]
-    is_solution_list = [
-        tags is not None and "solution" in tags for tags in cell_tags_list
-    ]
+    cell_tags_list = [c['metadata'].get('tags') for c in nb.cells]
+    is_solution_list = [tags is not None and 'solution' in tags
+                        for tags in cell_tags_list]
     # Completely remove cells with "solution" tags
-    nb.cells = [
-        cell
-        for cell, is_solution in zip(nb.cells, is_solution_list)
-        if not is_solution
-    ]
+    nb.cells = [cell for cell, is_solution in zip(nb.cells, is_solution_list)
+                if not is_solution]
 
     # Partial cell removal based on "# solution" comment
     marker = "# solution"
-    pattern = re.compile(f"^{marker}.*", flags=re.MULTILINE | re.DOTALL)
+    pattern = re.compile(f"^{marker}.*", flags=re.MULTILINE|re.DOTALL)
 
-    cells_to_modify = [
-        c
-        for c in nb.cells
-        if c["cell_type"] == "code" and marker in c["source"]
-    ]
+    cells_to_modify = [c for c in nb.cells if c["cell_type"] == "code" and
+                       marker in c["source"]]
 
     for c in cells_to_modify:
-        c["source"] = pattern.sub(WRITE_YOUR_CODE_COMMENT, c["source"])
-
-    previous_cell_is_write_your_code = False
-    all_cells_before_deduplication = nb.cells
-    nb.cells = []
-    for c in all_cells_before_deduplication:
-        if c["cell_type"] == "code" and c["source"] == WRITE_YOUR_CODE_COMMENT:
-            current_cell_is_write_your_code = True
-        else:
-            current_cell_is_write_your_code = False
-        if (
-            current_cell_is_write_your_code
-            and previous_cell_is_write_your_code
-        ):
-            # Drop duplicated "write your code here" cells.
-            continue
-        nb.cells.append(c)
-        previous_cell_is_write_your_code = current_cell_is_write_your_code
+        c["source"] = pattern.sub("# Write your code here.", c["source"])
 
     # TODO: we could potentially try to avoid changing the input file jupytext
     # header since this info is rarely useful. Let's keep it simple for now.
-    py_nb_str = jupytext.writes(nb, fmt="py:percent")
+    py_nb_str = jupytext.writes(nb, fmt='py:percent')
     return py_nb_str
 
 
 def write_exercise(solution_path, exercise_path):
-    print(f"Writing exercise to {exercise_path} from solution {solution_path}")
     input_str = solution_path.read_text()
 
     output_str = input_str
     for replace_func in [replace_simple_text, remove_solution]:
-        output_str = replace_func(output_str)
+        output_str= replace_func(output_str)
     exercise_path.write_text(output_str)
 
 
@@ -88,9 +59,7 @@ def write_all_exercises(python_scripts_folder):
     for solution_path in solution_paths:
         exercise_path = Path(str(solution_path).replace("_sol_", "_ex_"))
         if not exercise_path.exists():
-            print(
-                f"{exercise_path} does not exist, generating it from solution."
-            )
+            print(f"{exercise_path} does not exist")
 
         write_exercise(solution_path, exercise_path)
 
@@ -101,14 +70,12 @@ def write_all_exercises(python_scripts_folder):
     if path.is_dir():
         write_all_exercises(path)
     else:
-        if "_ex_" not in str(path):
+        if '_ex_' not in str(path):
             raise ValueError(
-                f"Path argument should be an exercise file. Path was {path}"
-            )
+                f'Path argument should be an exercise file. Path was {path}')
         solution_path = Path(str(path).replace("_ex_", "_sol_"))
         if not solution_path.exists():
             raise ValueError(
-                f"{solution_path} does not exist, check argument path {path}"
-            )
+                f"{solution_path} does not exist, check argument path {path}")
 
         write_exercise(solution_path, path)
@@ -41,7 +41,7 @@ def get_first_title(path):
     elif path.suffix == ".md":
         md_str = path.read_text()
     else:
-        raise ValueError(f"{path} is not a .py or a .md file")
+        raise ValueError(f"{filename} is not a .py or a .md file")
 
     return get_first_title_from_md_str(md_str)
 
@@ -96,9 +96,7 @@ def get_single_file_markdown(docname):
         # This is simpler to point to inria.github.io generated HTML otherwise
         # there are quirks (MyST in quizzes not supported, slides not working,
         # etc ...)
-        relative_url = (
-            str(target).replace("jupyter-book/", "").replace(".md", ".html")
-        )
+        relative_url = str(target).replace("jupyter-book/", "").replace(".md", ".html")
         target = f"https://inria.github.io/scikit-learn-mooc/{relative_url}"
 
     return f"[{title}]({target})"
@@ -142,9 +140,7 @@ def test_get_lesson_markdown():
     documents = json_info["documents"]
     print(
         get_lesson_markdown(
-            documents[
-                "predictive_modeling_pipeline/01_tabular_data_exploration_index"
-            ]
+            documents["predictive_modeling_pipeline/01_tabular_data_exploration_index"]
         )
     )
 
@@ -160,8 +156,7 @@ def get_module_markdown(module_dict, documents):
     module_title = module_dict["caption"]
     heading = f"# {module_title}"
     content = "\n\n".join(
-        get_lesson_markdown(documents[docname])
-        for docname in module_dict["items"]
+        get_lesson_markdown(documents[docname]) for docname in module_dict["items"]
     )
     return f"{heading}\n\n{content}"
 
@@ -224,9 +219,7 @@ def get_full_index_ipynb(toc_path):
     md_str = get_full_index_markdown(toc_path)
     nb = jupytext.reads(md_str, format=".md")
 
-    nb = nbformat.v4.new_notebook(
-        cells=[nbformat.v4.new_markdown_cell(md_str)]
-    )
+    nb = nbformat.v4.new_notebook(cells=[nbformat.v4.new_markdown_cell(md_str)])
 
     # nb_content = jupytext.writes(nb, fmt=".ipynb")
     # nb = json.loads(nb_content)
 
@@ -13,21 +13,16 @@ def remove_solution(input_myst_str):
     """
     nb = myst_to_notebook(input_myst_str)
 
-    cell_tags_list = [c["metadata"].get("tags") for c in nb.cells]
-    is_solution_list = [
-        tags is not None and "solution" in tags for tags in cell_tags_list
-    ]
-    nb.cells = [
-        cell
-        for cell, is_solution in zip(nb.cells, is_solution_list)
-        if not is_solution
-    ]
-
-    myst_nb_str = jupytext.writes(nb, fmt="myst")
-
-    header_pattern = re.compile(
-        r"---\njupytext.+---\s*", re.DOTALL | re.MULTILINE
-    )
+    cell_tags_list = [c['metadata'].get('tags') for c in nb.cells]
+    is_solution_list = [tags is not None and 'solution' in tags
+                        for tags in cell_tags_list]
+    nb.cells = [cell for cell, is_solution in zip(nb.cells, is_solution_list)
+                if not is_solution]
+
+    myst_nb_str = jupytext.writes(nb, fmt='myst')
+
+    header_pattern = re.compile(r"---\njupytext.+---\s*",
+                                re.DOTALL | re.MULTILINE)
     return re.sub(header_pattern, "", myst_nb_str)
 
 
@@ -44,14 +39,12 @@ def write_all_exercises(input_root_path, output_root_path):
 
     for input_path in input_exercises:
         # FIXME there may be a better way with the pathlib API
-        relative_path_str = re.sub(
-            str(input_root_path) + "/?", "", str(input_path)
-        )
+        relative_path_str = re.sub(str(input_root_path) + "/?", "",
+                                   str(input_path))
         output_path = Path(output_root_path).joinpath(relative_path_str)
         print(str(input_path), str(output_path))
         write_exercise_myst(input_path, output_path)
 
-
 if __name__ == "__main__":
     input_root_path = sys.argv[1]
     output_root_path = sys.argv[2]
 
@@ -4,8 +4,8 @@
 
 # TODO: we could get the list from .gitignore
 IGNORE_LIST = [
-    ".ipynb_checkpoints",
-    "__pycache__",
+    '.ipynb_checkpoints',
+    '__pycache__',
 ]
 
 folder1, folder2 = sys.argv[1:3]
@@ -28,7 +28,6 @@ def get_basename(folder):
     only_in_folder2 = set(basenames2) - set(basenames1)
 
     raise RuntimeError(
-        f"Inconsistency between folder {folder1} and {folder2}\n"
-        f"Only in folder {folder1}: {only_in_folder1}\n"
-        f"Only in folder {folder2}: {only_in_folder2}"
-    )
+        f'Inconsistency between folder {folder1} and {folder2}\n'
+        f'Only in folder {folder1}: {only_in_folder1}\n'
+        f'Only in folder {folder2}: {only_in_folder2}')
@@ -66,7 +66,7 @@ def import_version(pkg, min_ver, fail_msg=""):
     "numpy": "1.16",
     "scipy": "1.2",
     "matplotlib": "3.0",
-    "sklearn": "1.6",
+    "sklearn": "1.3",
     "pandas": "1",
     "seaborn": "0.11",
     "notebook": "5.7",
 
@@ -343,3 +343,4 @@ PAL0910,65,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg
 PAL0910,66,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N99A2,No,2009-11-21,49.6,18.2,193,3775,MALE,9.4618,-24.70615,Nest never observed with full clutch.
 PAL0910,67,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N100A1,Yes,2009-11-21,50.8,19,210,4100,MALE,9.98044,-24.68741,NA
 PAL0910,68,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N100A2,Yes,2009-11-21,50.2,18.7,198,3775,FEMALE,9.39305,-24.25255,NA
+
@@ -2,7 +2,7 @@ name: scikit-learn-course
 channels:
   - conda-forge
 dependencies:
-  - scikit-learn >= 1.6
+  - scikit-learn >= 1.3
   - pandas >= 1
   - matplotlib-base
   - seaborn >= 0.13
 
@@ -4,7 +4,7 @@ channels:
   - conda-forge
 
 dependencies:
-  - scikit-learn >= 1.6
+  - scikit-learn >= 1.3
   - pandas >= 1
   - matplotlib-base
   - seaborn >= 0.13
 
@@ -18,19 +18,20 @@
     plt.figure(figsize=(2.5, 2))
     patches = list()
     for this_y, target_name in enumerate(iris.target_names):
-        patch = plt.hist(
-            x[y == this_y],
-            bins=np.linspace(x.min(), x.max(), 20),
-            label=target_name,
-        )
+        patch = plt.hist(x[y == this_y],
+                         bins=np.linspace(x.min(), x.max(), 20),
+                         label=target_name)
         patches.append(patch[-1][0])
     style_figs.light_axis()
-    feature_name = feature_name.replace(" ", "_")
-    feature_name = feature_name.replace("(", "")
-    feature_name = feature_name.replace(")", "")
-    plt.savefig("iris_{}_hist.svg".format(feature_name))
+    feature_name = feature_name.replace(' ', '_')
+    feature_name = feature_name.replace('(', '')
+    feature_name = feature_name.replace(')', '')
+    plt.savefig('iris_{}_hist.svg'.format(feature_name))
 
-plt.figure(figsize=(6, 0.25))
-plt.legend(patches, iris.target_names, ncol=3, loc=(0, -0.37), borderaxespad=0)
+plt.figure(figsize=(6, .25))
+plt.legend(patches, iris.target_names, ncol=3, loc=(0, -.37),
+           borderaxespad=0)
 style_figs.no_axis()
-plt.savefig("legend_irises.svg")
+plt.savefig('legend_irises.svg')
+
+
-Original file line number
+Diff line change
 .idea
 *.code-workspace
 .vscode
++
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`This directory is to setup jupyter on binder`
	`2`	`+`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`# To use jupytext in binder`
`2`		`-c.ContentsManager.preferred_jupytext_formats_read = "py:percent" # noqa`
	`2`	`+c.ContentsManager.preferred_jupytext_formats_read = 'py:percent' # noqa`