INRIA
diff --git a/‎.github/workflows/deploy-gh-pages.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/deploy-gh-pages.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/jupyter-book-pr-preview.yml
Lines changed: 3 additions & 4 deletions b/‎.github/workflows/jupyter-book-pr-preview.yml
Lines changed: 3 additions & 4 deletions
diff --git a/‎.gitignore
Lines changed: 0 additions & 1 deletion b/‎.gitignore
Lines changed: 0 additions & 1 deletion
diff --git a/‎.jupyter/README.md
Lines changed: 0 additions & 1 deletion b/‎.jupyter/README.md
Lines changed: 0 additions & 1 deletion
diff --git a/‎.jupyter/jupyter_notebook_config.py
Lines changed: 1 addition & 1 deletion b/‎.jupyter/jupyter_notebook_config.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 6 additions & 6 deletions b/‎.pre-commit-config.yaml
Lines changed: 6 additions & 6 deletions
diff --git a/‎CITATION.cff
Lines changed: 8 additions & 0 deletions b/‎CITATION.cff
Lines changed: 8 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 3 additions & 4 deletions b/‎README.md
Lines changed: 3 additions & 4 deletions
diff --git a/‎build_tools/generate-exercise-from-solution.py
Lines changed: 49 additions & 16 deletions b/‎build_tools/generate-exercise-from-solution.py
Lines changed: 49 additions & 16 deletions
diff --git a/‎build_tools/generate-index.py
Lines changed: 12 additions & 5 deletions b/‎build_tools/generate-index.py
Lines changed: 12 additions & 5 deletions
@@ -58,7 +58,7 @@ jobs:
 
     - name: Upload jupyter-book artifact for preview in PRs
       if: ${{ github.event_name == 'pull_request' }}
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         name: jupyter-book
         path: |
 
@@ -19,11 +19,10 @@ jobs:
           sha: ${{ github.event.workflow_run.head_sha }}
           context: 'JupyterBook preview'
 
-      - uses: dawidd6/action-download-artifact@v2
+      - uses: actions/download-artifact@v4
         with:
-          github_token: ${{secrets.GITHUB_TOKEN}}
-          workflow: deploy-gh-pages.yml
-          run_id: ${{ github.event.workflow_run.id }}
+          github-token: ${{secrets.GITHUB_TOKEN}}
+          run-id: ${{ github.event.workflow_run.id }}
           name: jupyter-book
 
       - name: Get pull request number
 
@@ -33,4 +33,3 @@ doc/_build
 .idea
 *.code-workspace
 .vscode
-
@@ -1,2 +1 @@
 This directory is to setup jupyter on binder
-
@@ -1,2 +1,2 @@
 # To use jupytext in binder
-c.ContentsManager.preferred_jupytext_formats_read = 'py:percent'  # noqa
+c.ContentsManager.preferred_jupytext_formats_read = "py:percent"  # noqa
@@ -5,16 +5,16 @@ repos:
   -   id: check-yaml
   -   id: end-of-file-fixer
       exclude: notebooks
+      exclude_types: [svg]
   -   id: trailing-whitespace
       exclude: notebooks
+      exclude_types: [svg]
 - repo: https://github.com/psf/black
   rev: 23.1.0
   hooks:
   -   id: black
-- repo: https://github.com/pycqa/flake8
-  rev: 4.0.1
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.11.2
   hooks:
-    - id: flake8
-      entry: pflake8
-      additional_dependencies: [pyproject-flake8]
-      types: [file, python]
+  -   id: ruff
+      args: ["--fix", "--output-format=full"]
@@ -0,0 +1,8 @@
+cff-version: 1.2.0
+message: "If you use this content, please cite it as below."
+authors:
+  - name: "The scikit-learn MOOC developers"
+title: "scikit-learn MOOC"
+version: latest
+doi: https://doi.org/10.5281/zenodo.7220306
+url: "https://github.com/INRIA/scikit-learn-mooc"
@@ -1,9 +1,8 @@
 # scikit-learn course
 
-📢 📢 📢 A new session of the [Machine learning in Python with scikit-learn
-MOOC](https://www.fun-mooc.fr/en/courses/machine-learning-python-scikit-learn),
-is available starting on November 8th, 2023 and will remain open on self-paced
-mode. Enroll for the full MOOC experience (quizz solutions, executable
+This is the source code for the [Machine learning in Python with scikit-learn
+MOOC](https://www.fun-mooc.fr/en/courses/machine-learning-python-scikit-learn).
+Enroll for the full MOOC experience (quiz solutions, executable
 notebooks, discussion forum, etc ...) !
 
 The MOOC is free and hosted on the [FUN-MOOC](https://fun-mooc.fr/) platform
 
@@ -5,6 +5,10 @@
 from jupytext.myst import myst_to_notebook
 import jupytext
 
+
+WRITE_YOUR_CODE_COMMENT = "# Write your code here."
+
+
 def replace_simple_text(input_py_str):
     result = input_py_str.replace("📃 Solution for", "📝")
     return result
@@ -19,37 +23,62 @@ def remove_solution(input_py_str):
       before this comment and add "# Write your code here." at the end of the
       cell.
     """
-    nb = jupytext.reads(input_py_str, fmt='py:percent')
+    nb = jupytext.reads(input_py_str, fmt="py:percent")
 
-    cell_tags_list = [c['metadata'].get('tags') for c in nb.cells]
-    is_solution_list = [tags is not None and 'solution' in tags
-                        for tags in cell_tags_list]
+    cell_tags_list = [c["metadata"].get("tags") for c in nb.cells]
+    is_solution_list = [
+        tags is not None and "solution" in tags for tags in cell_tags_list
+    ]
     # Completely remove cells with "solution" tags
-    nb.cells = [cell for cell, is_solution in zip(nb.cells, is_solution_list)
-                if not is_solution]
+    nb.cells = [
+        cell
+        for cell, is_solution in zip(nb.cells, is_solution_list)
+        if not is_solution
+    ]
 
     # Partial cell removal based on "# solution" comment
     marker = "# solution"
-    pattern = re.compile(f"^{marker}.*", flags=re.MULTILINE|re.DOTALL)
+    pattern = re.compile(f"^{marker}.*", flags=re.MULTILINE | re.DOTALL)
 
-    cells_to_modify = [c for c in nb.cells if c["cell_type"] == "code" and
-                       marker in c["source"]]
+    cells_to_modify = [
+        c
+        for c in nb.cells
+        if c["cell_type"] == "code" and marker in c["source"]
+    ]
 
     for c in cells_to_modify:
-        c["source"] = pattern.sub("# Write your code here.", c["source"])
+        c["source"] = pattern.sub(WRITE_YOUR_CODE_COMMENT, c["source"])
+
+    previous_cell_is_write_your_code = False
+    all_cells_before_deduplication = nb.cells
+    nb.cells = []
+    for c in all_cells_before_deduplication:
+        if c["cell_type"] == "code" and c["source"] == WRITE_YOUR_CODE_COMMENT:
+            current_cell_is_write_your_code = True
+        else:
+            current_cell_is_write_your_code = False
+        if (
+            current_cell_is_write_your_code
+            and previous_cell_is_write_your_code
+        ):
+            # Drop duplicated "write your code here" cells.
+            continue
+        nb.cells.append(c)
+        previous_cell_is_write_your_code = current_cell_is_write_your_code
 
     # TODO: we could potentially try to avoid changing the input file jupytext
     # header since this info is rarely useful. Let's keep it simple for now.
-    py_nb_str = jupytext.writes(nb, fmt='py:percent')
+    py_nb_str = jupytext.writes(nb, fmt="py:percent")
     return py_nb_str
 
 
 def write_exercise(solution_path, exercise_path):
+    print(f"Writing exercise to {exercise_path} from solution {solution_path}")
     input_str = solution_path.read_text()
 
     output_str = input_str
     for replace_func in [replace_simple_text, remove_solution]:
-        output_str= replace_func(output_str)
+        output_str = replace_func(output_str)
     exercise_path.write_text(output_str)
 
 
@@ -59,7 +88,9 @@ def write_all_exercises(python_scripts_folder):
     for solution_path in solution_paths:
         exercise_path = Path(str(solution_path).replace("_sol_", "_ex_"))
         if not exercise_path.exists():
-            print(f"{exercise_path} does not exist")
+            print(
+                f"{exercise_path} does not exist, generating it from solution."
+            )
 
         write_exercise(solution_path, exercise_path)
 
@@ -70,12 +101,14 @@ def write_all_exercises(python_scripts_folder):
     if path.is_dir():
         write_all_exercises(path)
     else:
-        if '_ex_' not in str(path):
+        if "_ex_" not in str(path):
             raise ValueError(
-                f'Path argument should be an exercise file. Path was {path}')
+                f"Path argument should be an exercise file. Path was {path}"
+            )
         solution_path = Path(str(path).replace("_ex_", "_sol_"))
         if not solution_path.exists():
             raise ValueError(
-                f"{solution_path} does not exist, check argument path {path}")
+                f"{solution_path} does not exist, check argument path {path}"
+            )
 
         write_exercise(solution_path, path)
@@ -41,7 +41,7 @@ def get_first_title(path):
     elif path.suffix == ".md":
         md_str = path.read_text()
     else:
-        raise ValueError(f"{filename} is not a .py or a .md file")
+        raise ValueError(f"{path} is not a .py or a .md file")
 
     return get_first_title_from_md_str(md_str)
 
@@ -96,7 +96,9 @@ def get_single_file_markdown(docname):
         # This is simpler to point to inria.github.io generated HTML otherwise
         # there are quirks (MyST in quizzes not supported, slides not working,
         # etc ...)
-        relative_url = str(target).replace("jupyter-book/", "").replace(".md", ".html")
+        relative_url = (
+            str(target).replace("jupyter-book/", "").replace(".md", ".html")
+        )
         target = f"https://inria.github.io/scikit-learn-mooc/{relative_url}"
 
     return f"[{title}]({target})"
@@ -140,7 +142,9 @@ def test_get_lesson_markdown():
     documents = json_info["documents"]
     print(
         get_lesson_markdown(
-            documents["predictive_modeling_pipeline/01_tabular_data_exploration_index"]
+            documents[
+                "predictive_modeling_pipeline/01_tabular_data_exploration_index"
+            ]
         )
     )
 
@@ -156,7 +160,8 @@ def get_module_markdown(module_dict, documents):
     module_title = module_dict["caption"]
     heading = f"# {module_title}"
     content = "\n\n".join(
-        get_lesson_markdown(documents[docname]) for docname in module_dict["items"]
+        get_lesson_markdown(documents[docname])
+        for docname in module_dict["items"]
     )
     return f"{heading}\n\n{content}"
 
@@ -219,7 +224,9 @@ def get_full_index_ipynb(toc_path):
     md_str = get_full_index_markdown(toc_path)
     nb = jupytext.reads(md_str, format=".md")
 
-    nb = nbformat.v4.new_notebook(cells=[nbformat.v4.new_markdown_cell(md_str)])
+    nb = nbformat.v4.new_notebook(
+        cells=[nbformat.v4.new_markdown_cell(md_str)]
+    )
 
     # nb_content = jupytext.writes(nb, fmt=".ipynb")
     # nb = json.loads(nb_content)
-Original file line number
+Diff line change
 .idea
 *.code-workspace
 .vscode
+-
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1 @@`
`1`	`1`	`This directory is to setup jupyter on binder`
`2`		`-`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`# To use jupytext in binder`
`2`		`-c.ContentsManager.preferred_jupytext_formats_read = 'py:percent' # noqa`
	`2`	`+c.ContentsManager.preferred_jupytext_formats_read = "py:percent" # noqa`