diff --git a/.gitignore b/.gitignore index b1622e2..512bd36 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,11 @@ src/tranquilo_dev/slidev/public/bld_paper/ src/tranquilo_dev/slidev/public/bld_slidev/* src/tranquilo_dev/slidev/public/bld_slidev/ +# docs +docs/source/bld/* +docs/source/_static/bld/* +docs/source/index.md + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml new file mode 100644 index 0000000..cdb3456 --- /dev/null +++ b/docs/rtd_environment.yml @@ -0,0 +1,20 @@ +--- +name: tranquilo-dev-docs +channels: + - conda-forge + - nodefaults +dependencies: + - python=3.11 + - pip + - setuptools_scm + - toml + - black + - sphinx + - sphinxcontrib-bibtex + - sphinx-copybutton + - sphinx-panels + - ipython + - ipython_genutils + - nbsphinx + - pydata-sphinx-theme<=0.12.0 + - myst-parser diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..b3100ed --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,60 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "tranquilo-dev" +author = "Janos Gabler, Tim Mensinger, Sebastian Gsell and Mariam Petrosyan" +release = "2023" +copyright = f"2023, {author}" # noqa: A001 + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. + +extensions = [ + "sphinx_copybutton", + "sphinx_panels", + "nbsphinx", + "myst_parser", +] + + +myst_enable_extensions = [ + "colon_fence", + "dollarmath", +] + +source_suffix = [".md"] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. + +html_theme = "pydata_sphinx_theme" + + +html_theme_options = { + "navbar_start": [], + "navbar_center": ["navbar-nav"], + "navbar_end": ["navbar-icon-links"], + "navbar_persistent": ["search-button"], + "navigation_with_keys": False, +} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] diff --git a/environment.yml b/environment.yml index 7453275..e6391c5 100644 --- a/environment.yml +++ b/environment.yml @@ -32,17 +32,22 @@ dependencies: - pytask<0.4 - black - nb_black - - pre-commit - pdbpp + - pre-commit + - pytask >=0.2.3 - pytest - pytest-cov - pytest-xdist + - sphinx + - setuptools_scm + - toml # Misc End - pip: - black - blackcellmagic - kaleido + - snakemd # install this project locally - -e . diff --git a/src/tranquilo_dev/benchmark_report/task_copy_plots.py b/src/tranquilo_dev/benchmark_report/task_copy_plots.py new file mode 100644 index 0000000..69653bd --- /dev/null +++ b/src/tranquilo_dev/benchmark_report/task_copy_plots.py @@ -0,0 +1,50 @@ +import shutil + +import estimagic as em +import pytask +from tranquilo_dev.config import BLD +from tranquilo_dev.config import PLOT_CONFIG +from tranquilo_dev.config import PROBLEM_SETS +from tranquilo_dev.config import SPHINX_STATIC_BLD + + +SPHINX_FIGURES = SPHINX_STATIC_BLD / "figures" +BLD_FIGURES = BLD / "figures" + +for name, info in PLOT_CONFIG.items(): + + for plot_type in ["profile", "deviation"]: + source_file = BLD_FIGURES / f"{plot_type}_plots" / f"{name}.svg" + dest_file = SPHINX_FIGURES / f"{plot_type}_plots" / f"{name}.svg" + + @pytask.mark.depends_on(source_file) + @pytask.mark.produces(dest_file) + @pytask.mark.task(id=f"copy_{plot_type}_plot_{name}") + def task_copy_file(depends_on, produces): + shutil.copyfile(depends_on, produces) + + plot_type = "convergence" + source_files = {} + dest_files = {} + + problems = em.get_benchmark_problems(**PROBLEM_SETS[info["problem_name"]]) + for problem in problems.keys(): + source_files[problem] = ( + BLD_FIGURES + / f"{plot_type}_plots" # noqa: W503 + / f"{name}" # noqa: W503 + / f"{problem}.svg" # noqa: W503 + ) + dest_files[problem] = ( + SPHINX_FIGURES + / f"{plot_type}_plots" # noqa: W503 + / f"{name}" # noqa: W503 + / f"{problem}.svg" # noqa: W503 + ) + + @pytask.mark.depends_on(source_files) + @pytask.mark.produces(dest_files) + @pytask.mark.task(id=f"copy_{plot_type}_plots_{name}") + def task_copy_files_convergence_plots(depends_on, produces): + for key in depends_on.keys(): + shutil.copyfile(depends_on[key], produces[key]) diff --git a/src/tranquilo_dev/benchmark_report/task_create_index.py b/src/tranquilo_dev/benchmark_report/task_create_index.py new file mode 100644 index 0000000..5d6db80 --- /dev/null +++ b/src/tranquilo_dev/benchmark_report/task_create_index.py @@ -0,0 +1,31 @@ +import pytask +import snakemd +from tranquilo_dev.config import PLOT_CONFIG +from tranquilo_dev.config import SPHINX +from tranquilo_dev.config import SPHINX_PAGES_BLD + + +DEPS = {} +for name in PLOT_CONFIG.keys(): + DEPS[name] = SPHINX_PAGES_BLD / f"{name}.md" + + +@pytask.mark.depends_on(DEPS) +@pytask.mark.produces(SPHINX / "index.md") +def task_create_index(produces): + doc = snakemd.new_doc() + + doc.add_heading("Welcome to tranquilo-dev's benchmark reports!") + doc.add_paragraph("This is the index page of the benchmark reports.") + + pages = "\n".join([f"bld/{name}" for name in PLOT_CONFIG.keys()]) + + doc.add_raw(f"```{{toctree}} \n--- \nmaxdepth: 1 \n--- \n{pages}\n```") + + doc.add_paragraph( + """ + {ref}`search` + """ + ) + + doc.dump(produces.parent / "index") diff --git a/src/tranquilo_dev/benchmark_report/task_create_markdown_pages.py b/src/tranquilo_dev/benchmark_report/task_create_markdown_pages.py new file mode 100644 index 0000000..9ff7710 --- /dev/null +++ b/src/tranquilo_dev/benchmark_report/task_create_markdown_pages.py @@ -0,0 +1,315 @@ +import estimagic as em +import pandas as pd +import pytask +import snakemd +from estimagic.benchmarking.process_benchmark_results import ( + process_benchmark_results, +) +from tranquilo_dev.config import BLD +from tranquilo_dev.config import PLOT_CONFIG +from tranquilo_dev.config import PROBLEM_SETS +from tranquilo_dev.config import SPHINX_PAGES_BLD +from tranquilo_dev.config import SPHINX_STATIC_BLD + + +for name, info in PLOT_CONFIG.items(): + + problem_name = info["problem_name"] + problems = em.get_benchmark_problems(**PROBLEM_SETS[problem_name]) + + DEPS_RESULTS = {} + DEPS_FIGURES = {} + for scenario in info["scenarios"]: + DEPS_RESULTS[scenario] = BLD / "benchmarks" / f"{problem_name}_{scenario}.pkl" + for problem in problems.keys(): + DEPS_FIGURES["convergence"] = ( + SPHINX_STATIC_BLD + / "figures" # noqa: W503 + / "convergence_plots" # noqa: W503 + / f"{name}" # noqa: W503 + / f"{problem}.svg" # noqa: W503 + ) + for plot_type in ["profile", "deviation"]: + DEPS_FIGURES[plot_type] = ( + SPHINX_STATIC_BLD / "figures" / f"{plot_type}_plots" / f"{name}.svg" + ) + + @pytask.mark.depends_on(DEPS_FIGURES | DEPS_RESULTS) + @pytask.mark.produces(SPHINX_PAGES_BLD / f"{name}.md") + @pytask.mark.task(id=f"markdown_page_{name}") + def task_create_markdown_pages( + produces, + name=name, + info=info, + problems=problems, + paths=DEPS_RESULTS, + ): + scenarios = info["scenarios"] + results = {} + for path in paths.values(): + results = {**results, **pd.read_pickle(path)} + + convergence_report = _create_convergence_report( + problems=problems, + results=results, + **info.get("convergence_report_options", {}), + ) + rank_report = _create_rank_report( + problems=problems, + results=results, + **info.get("rank_report_options", {}), + ) + traceback_report = _create_traceback_report( + results=results, + ) + + doc = snakemd.new_doc() + doc.add_heading(f"{name}", level=1) + + # 1. Profile and Deviation Plots + for plot_type in ["profile", "deviation"]: + doc.add_heading(f"{plot_type.capitalize()} Plot", level=2) + doc.add_raw( + f"![{plot_type}](../_static/bld/figures/{plot_type}_plots/{name}.svg)" + ) + + # 2. Convergence report + doc.add_heading("Convergence Report", level=2) + rows = convergence_report.reset_index().values.tolist() + header = ["problem"] + info["scenarios"] + ["dimensionality"] + doc.add_table(header, rows) + + # 3. Rank report + doc.add_heading("Rank Report", level=2) + rows = rank_report.reset_index().values.tolist() + header = ["problem"] + info["scenarios"] + doc.add_table(header, rows) + + # 4. Error messages, grouped by scenario + if len(traceback_report) > 0: + doc.add_heading("Traceback Report", level=2) + for scenario in traceback_report: + if not traceback_report[scenario].isnull().all(): + doc.add_heading(scenario, level=3) + tracebacks = traceback_report[scenario].to_dict() + for problem, traceback in tracebacks.items(): + if isinstance(traceback, str): + doc.add_heading(problem, level=4) + doc.add_raw(f"```python \n{traceback} \n```") + + # 5. Convergence plots of problems that have not been solved + problems_not_solved = _get_problems_not_solved( + problems=problems, + results=results, + ) + + doc.add_heading("Convergence Plots for Problems Not Solved", level=2) + for scenario in scenarios: + doc.add_heading(scenario, level=3) + for problem in problems_not_solved: + doc.add_raw( + f"![convergence_{problem}]" + f"(../_static/bld/figures/convergence_plots/{name}/{problem}.svg)" + ) + + doc.dump(produces.parent / name) + + +def _create_convergence_report( + problems, results, *, stopping_criterion="y", x_precision=1e-4, y_precision=1e-4 +): + """Create a DataFrame with all information needed for the convergence report. + + Args: + problems (dict): estimagic benchmarking problems dictionary. Keys are the + problem names. Values contain information on the problem, including the + solution value. + results (dict): estimagic benchmarking results dictionary. Keys are + tuples of the form (problem, algorithm), values are dictionaries of the + collected information on the benchmark run, including 'criterion_history' + and 'time_history'. + stopping_criterion (str): one of "x_and_y", "x_or_y", "x", "y". Determines + how convergence is determined from the two precisions. Default is "y". + x_precision (float or None): how close an algorithm must have gotten to the + true parameter values (as percent of the Euclidean distance between start + and solution parameters) before the criterion for clipping and convergence + is fulfilled. Default is 1e-4. + y_precision (float or None): how close an algorithm must have gotten to the + true criterion values (as percent of the distance between start + and solution criterion value) before the criterion for clipping and + convergence is fulfilled. Default is 1e-4. + + Returns: + pandas.DataFrame: columns are the scenarios (i.e. algorithms) and the + dimensionality of the problems, index are the problems. + For the scenario columns, the values are strings that are either + "success" "failed", or "error". For the dimensionality column, the values + denote the number dimensions of the problem. + + """ + _, converged_info = process_benchmark_results( + problems=problems, + results=results, + stopping_criterion=stopping_criterion, + x_precision=x_precision, + y_precision=y_precision, + ) + + convergence_report = converged_info.replace({True: "success", False: "failed"}) + + for key, value in results.items(): + if isinstance(value["solution"], str): + convergence_report.at[key] = "error" + + dim = {problem: len(problems[problem]["inputs"]["params"]) for problem in problems} + convergence_report["dimensionality"] = convergence_report.index.map(dim) + + return convergence_report + + +def _create_rank_report( + problems, + results, + *, + runtime_measure="n_evaluations", + normalize_runtime=False, + stopping_criterion="y", + x_precision=1e-4, + y_precision=1e-4, +): + """Create a DataFrame with all information needed for the rank report. + + Args: + problems (dict): estimagic benchmarking problems dictionary. Keys are the + problem names. Values contain information on the problem, including the + solution value. + results (dict): estimagic benchmarking results dictionary. Keys are + tuples of the form (problem, algorithm), values are dictionaries of the + collected information on the benchmark run, including 'criterion_history' + and 'time_history'. + runtime_measure (str): "n_evaluations", "n_batches" or "walltime". + This is the runtime until the desired convergence was reached by an + algorithm. This is called performance measure by Moré and Wild (2009). + Default is "n_evaluations". + normalize_runtime (bool): If True the runtime each algorithm needed for each + problem is scaled by the time the fastest algorithm needed. If True, the + resulting plot is what Moré and Wild (2009) called data profiles. + Default is False. + stopping_criterion (str): one of "x_and_y", "x_or_y", "x", "y". Determines + how convergence is determined from the two precisions. + x_precision (float or None): how close an algorithm must have gotten to the + true parameter values (as percent of the Euclidean distance between start + and solution parameters) before the criterion for clipping and convergence + is fulfilled. Default is 1e-4. + y_precision (float or None): how close an algorithm must have gotten to the + true criterion values (as percent of the distance between start + and solution criterion value) before the criterion for clipping and + convergence is fulfilled. Default is 1e-4. + + Returns: + pandas.DataFrame: columns are the scenarios (i.e. algorithms), index are the + problems. The values are the ranks of the algorithms for each problem, + 0 means the algorithm was the fastest, 1 means it was the second fastest + and so on. If an algorithm did not converge on a problem, the value is + "failed". If an algorithm did encounter an error during optimization, + the value is "error". + + """ + histories, converged_info = process_benchmark_results( + problems=problems, + results=results, + stopping_criterion=stopping_criterion, + x_precision=x_precision, + y_precision=y_precision, + ) + scenarios = list({algo[1] for algo in results.keys()}) + + success_info = converged_info.replace({True: "success", False: "failed"}) + for key, value in results.items(): + if isinstance(value["solution"], str): + success_info.at[key] = "error" + + solution_times = histories.groupby(["problem", "algorithm"])[runtime_measure].max() + + if normalize_runtime: + solution_times = solution_times.unstack() + solution_times = solution_times.divide(solution_times.min(axis=1), axis=0) + solution_times = solution_times.stack(dropna=False) + solution_times.name = runtime_measure + + solution_times = solution_times.reset_index() + solution_times["rank"] = ( + solution_times.groupby("problem")[runtime_measure].rank( + method="dense", ascending=True + ) + - 1 # noqa: W503 + ).astype("Int64") + + df_wide = solution_times.pivot(index="problem", columns="algorithm", values="rank") + rank_report = df_wide.astype(str)[scenarios] + rank_report[~converged_info] = success_info[scenarios] + + return rank_report + + +def _create_traceback_report(results): + """Create a DataFrame with the traceback of all problems that have not been solved. + + Args: + results (dict): estimagic benchmarking results dictionary. Keys are + tuples of the form (problem, algorithm), values are dictionaries of the + collected information on the benchmark run, including 'criterion_history' + and 'time_history'. + + Returns: + pandas.DataFrame: columns are the scenarios (i.e. algorithms), index are the + problems. The values are the traceback of the algorithms for each problem + the algorithm stopped with an error. + + """ + scenarios = list({algo[1] for algo in results.keys()}) + + tracebacks = {} + for scenario in scenarios: + tracebacks[scenario] = {} + + for key, value in results.items(): + if isinstance(value["solution"], str): + if key[1] in scenarios: + tracebacks[key[1]][key[0]] = value["solution"] + + traceback_report = pd.DataFrame.from_dict(tracebacks, orient="columns") + + return traceback_report + + +def _get_problems_not_solved(problems, results, stopping_criterion="y"): + """Get a list of problems that have not been solved by any algorithm. + + Args: + problems (dict): estimagic benchmarking problems dictionary. Keys are the + problem names. Values contain information on the problem, including the + solution value. + results (dict): estimagic benchmarking results dictionary. Keys are + tuples of the form (problem, algorithm), values are dictionaries of the + collected information on the benchmark run, including 'criterion_history' + and 'time_history'. + stopping_criterion (str): one of "x_and_y", "x_or_y", "x", "y". Determines + how convergence is determined from the two precisions. Default is "y". + + Returns: + list: list of problem names that have not been solved by any algorithm. + + """ + kwargs = info.get("convergence_report_options", {}) + kwargs["stopping_criterion"] = kwargs.get("stopping_criterion", stopping_criterion) + + _, converged_info = process_benchmark_results( + problems=problems, + results=results, + **kwargs, + ) + + problems_not_solved = converged_info.index[~converged_info.any(axis=1)].tolist() + + return problems_not_solved diff --git a/src/tranquilo_dev/config.py b/src/tranquilo_dev/config.py index 41edc35..541a6f3 100644 --- a/src/tranquilo_dev/config.py +++ b/src/tranquilo_dev/config.py @@ -27,6 +27,10 @@ BLD = ROOT.joinpath("bld").resolve() PUBLIC = BLD.joinpath("public").resolve() +SPHINX = ROOT.joinpath("docs/source/").resolve() +SPHINX_PAGES_BLD = SPHINX.joinpath("bld").resolve() +SPHINX_STATIC_BLD = SPHINX.joinpath("_static/bld").resolve() + # ====================================================================================== # Global Options diff --git a/tox.ini b/tox.ini index 8132c01..1572049 100644 --- a/tox.ini +++ b/tox.ini @@ -33,7 +33,7 @@ ignore = D004 max-line-length = 88 [flake8] -docstring-convention = numpy +docstring-convention = google ignore = D ; ignore missing docstrings. E203 ; ignore whitespace around : which is enforced by Black.