|
| 1 | +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | +# ============================================================================== |
| 15 | +r"""Lint assertions for notebooks published on tensorflow.org. |
| 16 | +
|
| 17 | +These lints are a non-exhaustive implementation of style rules found in the |
| 18 | +TensorFlow documentation and style guides. See: |
| 19 | +
|
| 20 | +- https://www.tensorflow.org/community/contribute/docs |
| 21 | +- https://www.tensorflow.org/community/contribute/docs_style |
| 22 | +
|
| 23 | +When adding lints, link to the URL of the relevant style rule, if applicable. |
| 24 | +
|
| 25 | +Lint functions return a boolean: True to pass, False to fail. |
| 26 | +For @lint options, see the docstrings in `decorator.py`. |
| 27 | +
|
| 28 | +Lint callback functions are passed an `args` dict with the following entries: |
| 29 | + cell_data: Dict of parsed cell (cell-scope only) |
| 30 | + cell_source: String of cell content (cell-scope only) |
| 31 | + file_data: Dict of parsed notebook |
| 32 | + file_source: String of notebook content |
| 33 | + path: Filepath of notebook |
| 34 | + user: Dict of args passed at the command-line |
| 35 | +""" |
| 36 | +import pathlib |
| 37 | +import re |
| 38 | +import urllib |
| 39 | + |
| 40 | +from tensorflow_docs.tools.nblint import fix |
| 41 | +from tensorflow_docs.tools.nblint.decorator import fail |
| 42 | +from tensorflow_docs.tools.nblint.decorator import lint |
| 43 | +from tensorflow_docs.tools.nblint.decorator import Options |
| 44 | + |
| 45 | + |
| 46 | +# Acceptable copyright heading for notebooks following this style. |
| 47 | +copyrights_re = [ |
| 48 | + r"Copyright 20[1-9][0-9] The TensorFlow\s.*?\s?Authors", |
| 49 | + r"Copyright 20[1-9][0-9] Google", |
| 50 | + r"Copyright 20[1-9][0-9] The AI Edge Authors", |
| 51 | +] |
| 52 | + |
| 53 | + |
| 54 | +@lint(message="Copyright required", scope=Options.Scope.TEXT) |
| 55 | +def copyright_check(args): |
| 56 | + cell_source = args["cell_source"] |
| 57 | + return any(re.search(pattern, cell_source) for pattern in copyrights_re) |
| 58 | + |
| 59 | + |
| 60 | +license_re = re.compile("#\s?@title Licensed under the Apache License") |
| 61 | + |
| 62 | + |
| 63 | +@lint( |
| 64 | + message="Apache license cell is required", |
| 65 | + scope=Options.Scope.CODE, |
| 66 | + cond=Options.Cond.ANY) |
| 67 | +def license_check(args): |
| 68 | + if license_re.search(args["cell_source"]): |
| 69 | + return True |
| 70 | + else: |
| 71 | + template_url = "https://github.com/tensorflow/docs/blob/master/tools/templates/notebook.ipynb" |
| 72 | + fail(f"License cell missing or doesn't follow template: {template_url}") |
| 73 | + |
| 74 | + |
| 75 | +@lint(scope=Options.Scope.FILE) |
| 76 | +def not_translation(args): |
| 77 | + if "site" not in args["path"].parents: |
| 78 | + return True |
| 79 | + else: |
| 80 | + return "site/en" in args["path"].parents |
| 81 | + |
| 82 | + |
| 83 | +# Button checks |
| 84 | + |
| 85 | +# Look for class="tfo-notebook-buttons" (CSS used on website versions) or the |
| 86 | +# run-in-colab logo (for notebooks that stick to GitHub/Colab). |
| 87 | +is_button_cell_re = re.compile( |
| 88 | + r"class.*tfo-notebook-buttons|colab_logo_32px\.png|colab-badge\.svg" |
| 89 | +) |
| 90 | + |
| 91 | + |
| 92 | +def get_arg_or_fail(user_args, arg_name, arg_fmt): |
| 93 | + """Get value of the user-defined arg passed at the command-line. |
| 94 | +
|
| 95 | + Args: |
| 96 | + user_args: Dict containing user-defined args passed at command-line. |
| 97 | + arg_name: String name of user-defined arg. |
| 98 | + arg_fmt: String format of expected user-defined arg. |
| 99 | +
|
| 100 | + Returns: |
| 101 | + Value of arg passed to command-line. If the arg does not exist, raise a |
| 102 | + failure, log a message, and skip the lint function. |
| 103 | + """ |
| 104 | + if arg_name in user_args: |
| 105 | + return user_args.get(arg_name) |
| 106 | + else: |
| 107 | + fail( |
| 108 | + f"Requires user-argument '{arg_name}': nblint --arg={arg_name}:{arg_fmt} ...", |
| 109 | + always_show=True) |
| 110 | + |
| 111 | + |
| 112 | +def split_doc_path(filepath): |
| 113 | + """Return paths for docs root prefix directory and the relative path to file. |
| 114 | +
|
| 115 | + Given a full path to notebook file, standalone or within an established |
| 116 | + documentation directory layout, split the provided path into two: |
| 117 | + 1. a path reprsenting the prefix directory to the docs root (if it exists), |
| 118 | + 2. the relative path to the file from the docs root directory. |
| 119 | + If in an unknown docs directory layout, return an empty prefix path and the |
| 120 | + full path of the original argument. |
| 121 | +
|
| 122 | + For example: |
| 123 | + "site/en/notebook.ipynb" => ("site/en", "notebook.ipynb") |
| 124 | + "tensorflow/docs/notebook.ipynb" => ("docs", "notebook.ipynb") |
| 125 | + "unknown/path/notebook.ipynb" => ("", "unknown/path/notebook.ipynb") |
| 126 | +
|
| 127 | + Args: |
| 128 | + filepath: `pathlib.Path` to a documentation notebook. |
| 129 | +
|
| 130 | + Returns: |
| 131 | + pathlib.Path: The path of the doc root prefix directory., if applicable. |
| 132 | + pathlib.Path: The relative path to notebook from the prefix directory. |
| 133 | + """ |
| 134 | + fp_full = filepath.resolve() # Check full path for sub-elements. |
| 135 | + |
| 136 | + def split_path_on_dir(fp, dirname, offset=1): |
| 137 | + parts = fp.parts |
| 138 | + idx = parts.index(dirname) |
| 139 | + docs_dir = pathlib.Path(*parts[idx:idx + offset]) |
| 140 | + rel_path = fp.relative_to(*parts[:idx + offset]) |
| 141 | + return docs_dir, rel_path |
| 142 | + |
| 143 | + if "site" in fp_full.parts: |
| 144 | + return split_path_on_dir(fp_full, "site", offset=2) # site/<lang>/ |
| 145 | + elif "docs" in fp_full.parts: |
| 146 | + return split_path_on_dir(fp_full, "docs") |
| 147 | + elif "g3doc" in fp_full.parts: |
| 148 | + idx = fp_full.parts.index("g3doc") |
| 149 | + if fp_full.parts[idx + 1] == "en": |
| 150 | + offset = 2 |
| 151 | + else: |
| 152 | + offset = 1 |
| 153 | + return split_path_on_dir(fp_full, "g3doc", offset=offset) |
| 154 | + else: |
| 155 | + # Unknown setup. Return empty root and unsplit path. |
| 156 | + return pathlib.Path(), filepath |
| 157 | + |
| 158 | + |
| 159 | +@lint( |
| 160 | + message="Missing or malformed URL in Colab button.", |
| 161 | + scope=Options.Scope.TEXT, |
| 162 | + cond=Options.Cond.ANY) |
| 163 | +def button_colab(args): |
| 164 | + """Test that the URL in the Colab button matches the file path.""" |
| 165 | + cell_source = args["cell_source"] |
| 166 | + repo = get_arg_or_fail(args["user"], "repo", "<org/name>") |
| 167 | + branch = args["user"].get("branch", "master") |
| 168 | + docs_dir, rel_path = split_doc_path(args["path"]) |
| 169 | + |
| 170 | + # Buttons use OSS URLs. |
| 171 | + if str(docs_dir) == "g3doc/en": |
| 172 | + docs_dir = pathlib.Path("site/en") |
| 173 | + |
| 174 | + base_url = f"colab.research.google.com/github/{repo}/blob/{branch}" |
| 175 | + this_url = "https://" + str(base_url / docs_dir / rel_path) |
| 176 | + |
| 177 | + if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1: |
| 178 | + return True |
| 179 | + else: |
| 180 | + fail( |
| 181 | + f"Colab button URL doesn't match: {this_url}", |
| 182 | + fix=fix.regex_between_groups_replace_all, |
| 183 | + fix_args=[r"(href.*)http.*?(\\\".*colab_logo_32px.png)", this_url]) |
| 184 | + |
| 185 | + |
| 186 | +@lint( |
| 187 | + message="Missing or malformed URL in Download button.", |
| 188 | + scope=Options.Scope.TEXT, |
| 189 | + cond=Options.Cond.ANY) |
| 190 | +def button_download(args): |
| 191 | + """Test that the URL in the Download button matches the file path.""" |
| 192 | + cell_source = args["cell_source"] |
| 193 | + repo = get_arg_or_fail(args["user"], "repo", "<org/name>") |
| 194 | + repo_name = pathlib.Path(repo.split("/")[1]) |
| 195 | + docs_dir, rel_path = split_doc_path(args["path"]) |
| 196 | + |
| 197 | + if "r1" in rel_path.parts: |
| 198 | + return True # No download button for TF 1.x docs. |
| 199 | + |
| 200 | + # Buttons use OSS URLs. |
| 201 | + if str(docs_dir) == "g3doc/en": |
| 202 | + docs_dir = pathlib.Path("site/en") |
| 203 | + |
| 204 | + this_url = urllib.parse.urljoin( |
| 205 | + "https://storage.googleapis.com", |
| 206 | + str(f"tensorflow_docs/{repo_name}" / docs_dir / rel_path)) |
| 207 | + |
| 208 | + if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1: |
| 209 | + return True |
| 210 | + else: |
| 211 | + fail( |
| 212 | + f"Download button URL doesn't match: {this_url}", |
| 213 | + fix=fix.regex_between_groups_replace_all, |
| 214 | + fix_args=[r"(href.*)http.*?(\\\".*download_logo_32px.png)", this_url]) |
| 215 | + |
| 216 | + |
| 217 | +@lint( |
| 218 | + message="Missing or malformed URL in GitHub button.", |
| 219 | + scope=Options.Scope.TEXT, |
| 220 | + cond=Options.Cond.ANY) |
| 221 | +def button_github(args): |
| 222 | + """Test that the URL in the GitHub button matches the file path.""" |
| 223 | + cell_source = args["cell_source"] |
| 224 | + repo = get_arg_or_fail(args["user"], "repo", "<org/name>") |
| 225 | + branch = args["user"].get("branch", "master") |
| 226 | + docs_dir, rel_path = split_doc_path(args["path"]) |
| 227 | + |
| 228 | + # Buttons use OSS URLs. |
| 229 | + if str(docs_dir) == "g3doc/en": |
| 230 | + docs_dir = pathlib.Path("site/en") |
| 231 | + |
| 232 | + base_url = f"github.com/{repo}/blob/{branch}" |
| 233 | + this_url = "https://" + str(base_url / docs_dir / rel_path) |
| 234 | + |
| 235 | + if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1: |
| 236 | + return True |
| 237 | + else: |
| 238 | + fail( |
| 239 | + f"GitHub button URL doesn't match: {this_url}", |
| 240 | + fix=fix.regex_between_groups_replace_all, |
| 241 | + fix_args=[r"(href.*)http.*?(\\\".*GitHub-Mark-32px.png)", this_url]) |
| 242 | + |
| 243 | + |
| 244 | +@lint( |
| 245 | + message="Missing or malformed URL in 'View on' button.", |
| 246 | + scope=Options.Scope.TEXT, |
| 247 | + cond=Options.Cond.ANY) |
| 248 | +def button_website(args): |
| 249 | + """Test that the website URL in the 'View on' button matches the file path. |
| 250 | +
|
| 251 | + Because of subsites and different output directories, the exact website path |
| 252 | + can't be known from the file alone. But can check that the URL matches a |
| 253 | + correct pattern. |
| 254 | +
|
| 255 | + Args: |
| 256 | + args: Nested dict of runtime arguments. |
| 257 | +
|
| 258 | + Returns: |
| 259 | + Boolean: True if lint test passes, False if not. |
| 260 | + """ |
| 261 | + cell_source = args["cell_source"] |
| 262 | + docs_dir, rel_path = split_doc_path(args["path"]) |
| 263 | + |
| 264 | + if "r1" in rel_path.parts: |
| 265 | + return True # No website button for TF 1.x docs. |
| 266 | + |
| 267 | + user_url = args["user"].get("base_url") |
| 268 | + if user_url: |
| 269 | + base_url = user_url |
| 270 | + elif str(docs_dir) == "site/zh-cn" or str(docs_dir) == "site/zh-tw": |
| 271 | + base_url = "https://tensorflow.google.cn/" |
| 272 | + else: |
| 273 | + base_url = "https://www.tensorflow.org/" |
| 274 | + |
| 275 | + # Construct website URL pattern based on location of this file in repo. |
| 276 | + url_path = rel_path.with_suffix("") |
| 277 | + # If run in source repo, we don't know for certain the published subsite URL. |
| 278 | + # Match: base/<optional-subsite-path>/notebook-path |
| 279 | + this_url = rf"{base_url}[\w\-/]*{url_path}" |
| 280 | + |
| 281 | + if is_button_cell_re.search(cell_source) and re.search(this_url, cell_source): |
| 282 | + return True |
| 283 | + else: |
| 284 | + # If included verbatim, bracket will fail lint. That's desired. |
| 285 | + url_format = f"{base_url}<OPTIONAL-SUBSITE-PATH>/{url_path}" |
| 286 | + fail(f"'View on' button URL doesn't match pattern: {url_format}") |
| 287 | + |
| 288 | + |
| 289 | +@lint( |
| 290 | + message="Missing or malformed URL in 'TFHub' button.", |
| 291 | + scope=Options.Scope.TEXT, |
| 292 | + cond=Options.Cond.ANY) |
| 293 | +def button_hub(args): |
| 294 | + """Notebooks that mention tfhub.dev should have a TFHub button.""" |
| 295 | + cell_source = args["cell_source"] |
| 296 | + file_source = args["file_source"] |
| 297 | + |
| 298 | + hub_url = "https://tfhub.dev/" |
| 299 | + |
| 300 | + # Only check files that mention TFHub. |
| 301 | + if file_source.find(hub_url) == -1: |
| 302 | + return True |
| 303 | + |
| 304 | + if is_button_cell_re.search(cell_source) and cell_source.find(hub_url) != -1: |
| 305 | + return True |
| 306 | + else: |
| 307 | + # If included verbatim, bracket will fail lint. That's desired. |
| 308 | + url_format = f"{hub_url}<MODEL-OR-COLLECTION>" |
| 309 | + fail(f"'TFHub' button URL doesn't match pattern: {url_format}") |
| 310 | + |
| 311 | + |
| 312 | +@lint( |
| 313 | + message="Remove extra buttons from TF 1.x docs.", |
| 314 | + scope=Options.Scope.TEXT, |
| 315 | + cond=Options.Cond.ALL) |
| 316 | +def button_r1_extra(args): |
| 317 | + """The r1/ docs should not have website or download buttons.""" |
| 318 | + cell_source = args["cell_source"] |
| 319 | + docs_dir, rel_path = split_doc_path(args["path"]) |
| 320 | + |
| 321 | + # Only test r1/ notebooks. |
| 322 | + if "r1" not in rel_path.parts: |
| 323 | + return True |
| 324 | + # Only check text cells that contain the button nav bar. |
| 325 | + if not is_button_cell_re.search(cell_source): |
| 326 | + return True |
| 327 | + |
| 328 | + download_url = "https://storage.googleapis.com/tensorflow_docs/" |
| 329 | + if str(docs_dir) == "site/zh-cn" or str(docs_dir) == "site/zh-tw": |
| 330 | + base_url = "https://tensorflow.google.cn/" |
| 331 | + else: |
| 332 | + base_url = "https://www.tensorflow.org/" |
| 333 | + |
| 334 | + # Look for button URLs that shouldn't be there.. |
| 335 | + if (re.search(f"{base_url}/(?!images)", cell_source) or |
| 336 | + cell_source.find(download_url) != -1): |
| 337 | + fail( |
| 338 | + "Remove the 'View on' and 'Download notebook' buttons since r1/ docs are not published." |
| 339 | + ) |
| 340 | + else: |
| 341 | + return True |
0 commit comments