Skip to content

Commit a81ff2e

Browse files
Creating a new gemini-cookbook style.
PiperOrigin-RevId: 733943464
1 parent 6e1194d commit a81ff2e

File tree

1 file changed

+341
-0
lines changed

1 file changed

+341
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,341 @@
1+
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
r"""Lint assertions for notebooks published on tensorflow.org.
16+
17+
These lints are a non-exhaustive implementation of style rules found in the
18+
TensorFlow documentation and style guides. See:
19+
20+
- https://www.tensorflow.org/community/contribute/docs
21+
- https://www.tensorflow.org/community/contribute/docs_style
22+
23+
When adding lints, link to the URL of the relevant style rule, if applicable.
24+
25+
Lint functions return a boolean: True to pass, False to fail.
26+
For @lint options, see the docstrings in `decorator.py`.
27+
28+
Lint callback functions are passed an `args` dict with the following entries:
29+
cell_data: Dict of parsed cell (cell-scope only)
30+
cell_source: String of cell content (cell-scope only)
31+
file_data: Dict of parsed notebook
32+
file_source: String of notebook content
33+
path: Filepath of notebook
34+
user: Dict of args passed at the command-line
35+
"""
36+
import pathlib
37+
import re
38+
import urllib
39+
40+
from tensorflow_docs.tools.nblint import fix
41+
from tensorflow_docs.tools.nblint.decorator import fail
42+
from tensorflow_docs.tools.nblint.decorator import lint
43+
from tensorflow_docs.tools.nblint.decorator import Options
44+
45+
46+
# Acceptable copyright heading for notebooks following this style.
47+
copyrights_re = [
48+
r"Copyright 20[1-9][0-9] The TensorFlow\s.*?\s?Authors",
49+
r"Copyright 20[1-9][0-9] Google",
50+
r"Copyright 20[1-9][0-9] The AI Edge Authors",
51+
]
52+
53+
54+
@lint(message="Copyright required", scope=Options.Scope.TEXT)
55+
def copyright_check(args):
56+
cell_source = args["cell_source"]
57+
return any(re.search(pattern, cell_source) for pattern in copyrights_re)
58+
59+
60+
license_re = re.compile("#\s?@title Licensed under the Apache License")
61+
62+
63+
@lint(
64+
message="Apache license cell is required",
65+
scope=Options.Scope.CODE,
66+
cond=Options.Cond.ANY)
67+
def license_check(args):
68+
if license_re.search(args["cell_source"]):
69+
return True
70+
else:
71+
template_url = "https://github.com/tensorflow/docs/blob/master/tools/templates/notebook.ipynb"
72+
fail(f"License cell missing or doesn't follow template: {template_url}")
73+
74+
75+
@lint(scope=Options.Scope.FILE)
76+
def not_translation(args):
77+
if "site" not in args["path"].parents:
78+
return True
79+
else:
80+
return "site/en" in args["path"].parents
81+
82+
83+
# Button checks
84+
85+
# Look for class="tfo-notebook-buttons" (CSS used on website versions) or the
86+
# run-in-colab logo (for notebooks that stick to GitHub/Colab).
87+
is_button_cell_re = re.compile(
88+
r"class.*tfo-notebook-buttons|colab_logo_32px\.png|colab-badge\.svg"
89+
)
90+
91+
92+
def get_arg_or_fail(user_args, arg_name, arg_fmt):
93+
"""Get value of the user-defined arg passed at the command-line.
94+
95+
Args:
96+
user_args: Dict containing user-defined args passed at command-line.
97+
arg_name: String name of user-defined arg.
98+
arg_fmt: String format of expected user-defined arg.
99+
100+
Returns:
101+
Value of arg passed to command-line. If the arg does not exist, raise a
102+
failure, log a message, and skip the lint function.
103+
"""
104+
if arg_name in user_args:
105+
return user_args.get(arg_name)
106+
else:
107+
fail(
108+
f"Requires user-argument '{arg_name}': nblint --arg={arg_name}:{arg_fmt} ...",
109+
always_show=True)
110+
111+
112+
def split_doc_path(filepath):
113+
"""Return paths for docs root prefix directory and the relative path to file.
114+
115+
Given a full path to notebook file, standalone or within an established
116+
documentation directory layout, split the provided path into two:
117+
1. a path reprsenting the prefix directory to the docs root (if it exists),
118+
2. the relative path to the file from the docs root directory.
119+
If in an unknown docs directory layout, return an empty prefix path and the
120+
full path of the original argument.
121+
122+
For example:
123+
"site/en/notebook.ipynb" => ("site/en", "notebook.ipynb")
124+
"tensorflow/docs/notebook.ipynb" => ("docs", "notebook.ipynb")
125+
"unknown/path/notebook.ipynb" => ("", "unknown/path/notebook.ipynb")
126+
127+
Args:
128+
filepath: `pathlib.Path` to a documentation notebook.
129+
130+
Returns:
131+
pathlib.Path: The path of the doc root prefix directory., if applicable.
132+
pathlib.Path: The relative path to notebook from the prefix directory.
133+
"""
134+
fp_full = filepath.resolve() # Check full path for sub-elements.
135+
136+
def split_path_on_dir(fp, dirname, offset=1):
137+
parts = fp.parts
138+
idx = parts.index(dirname)
139+
docs_dir = pathlib.Path(*parts[idx:idx + offset])
140+
rel_path = fp.relative_to(*parts[:idx + offset])
141+
return docs_dir, rel_path
142+
143+
if "site" in fp_full.parts:
144+
return split_path_on_dir(fp_full, "site", offset=2) # site/<lang>/
145+
elif "docs" in fp_full.parts:
146+
return split_path_on_dir(fp_full, "docs")
147+
elif "g3doc" in fp_full.parts:
148+
idx = fp_full.parts.index("g3doc")
149+
if fp_full.parts[idx + 1] == "en":
150+
offset = 2
151+
else:
152+
offset = 1
153+
return split_path_on_dir(fp_full, "g3doc", offset=offset)
154+
else:
155+
# Unknown setup. Return empty root and unsplit path.
156+
return pathlib.Path(), filepath
157+
158+
159+
@lint(
160+
message="Missing or malformed URL in Colab button.",
161+
scope=Options.Scope.TEXT,
162+
cond=Options.Cond.ANY)
163+
def button_colab(args):
164+
"""Test that the URL in the Colab button matches the file path."""
165+
cell_source = args["cell_source"]
166+
repo = get_arg_or_fail(args["user"], "repo", "<org/name>")
167+
branch = args["user"].get("branch", "master")
168+
docs_dir, rel_path = split_doc_path(args["path"])
169+
170+
# Buttons use OSS URLs.
171+
if str(docs_dir) == "g3doc/en":
172+
docs_dir = pathlib.Path("site/en")
173+
174+
base_url = f"colab.research.google.com/github/{repo}/blob/{branch}"
175+
this_url = "https://" + str(base_url / docs_dir / rel_path)
176+
177+
if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1:
178+
return True
179+
else:
180+
fail(
181+
f"Colab button URL doesn't match: {this_url}",
182+
fix=fix.regex_between_groups_replace_all,
183+
fix_args=[r"(href.*)http.*?(\\\".*colab_logo_32px.png)", this_url])
184+
185+
186+
@lint(
187+
message="Missing or malformed URL in Download button.",
188+
scope=Options.Scope.TEXT,
189+
cond=Options.Cond.ANY)
190+
def button_download(args):
191+
"""Test that the URL in the Download button matches the file path."""
192+
cell_source = args["cell_source"]
193+
repo = get_arg_or_fail(args["user"], "repo", "<org/name>")
194+
repo_name = pathlib.Path(repo.split("/")[1])
195+
docs_dir, rel_path = split_doc_path(args["path"])
196+
197+
if "r1" in rel_path.parts:
198+
return True # No download button for TF 1.x docs.
199+
200+
# Buttons use OSS URLs.
201+
if str(docs_dir) == "g3doc/en":
202+
docs_dir = pathlib.Path("site/en")
203+
204+
this_url = urllib.parse.urljoin(
205+
"https://storage.googleapis.com",
206+
str(f"tensorflow_docs/{repo_name}" / docs_dir / rel_path))
207+
208+
if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1:
209+
return True
210+
else:
211+
fail(
212+
f"Download button URL doesn't match: {this_url}",
213+
fix=fix.regex_between_groups_replace_all,
214+
fix_args=[r"(href.*)http.*?(\\\".*download_logo_32px.png)", this_url])
215+
216+
217+
@lint(
218+
message="Missing or malformed URL in GitHub button.",
219+
scope=Options.Scope.TEXT,
220+
cond=Options.Cond.ANY)
221+
def button_github(args):
222+
"""Test that the URL in the GitHub button matches the file path."""
223+
cell_source = args["cell_source"]
224+
repo = get_arg_or_fail(args["user"], "repo", "<org/name>")
225+
branch = args["user"].get("branch", "master")
226+
docs_dir, rel_path = split_doc_path(args["path"])
227+
228+
# Buttons use OSS URLs.
229+
if str(docs_dir) == "g3doc/en":
230+
docs_dir = pathlib.Path("site/en")
231+
232+
base_url = f"github.com/{repo}/blob/{branch}"
233+
this_url = "https://" + str(base_url / docs_dir / rel_path)
234+
235+
if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1:
236+
return True
237+
else:
238+
fail(
239+
f"GitHub button URL doesn't match: {this_url}",
240+
fix=fix.regex_between_groups_replace_all,
241+
fix_args=[r"(href.*)http.*?(\\\".*GitHub-Mark-32px.png)", this_url])
242+
243+
244+
@lint(
245+
message="Missing or malformed URL in 'View on' button.",
246+
scope=Options.Scope.TEXT,
247+
cond=Options.Cond.ANY)
248+
def button_website(args):
249+
"""Test that the website URL in the 'View on' button matches the file path.
250+
251+
Because of subsites and different output directories, the exact website path
252+
can't be known from the file alone. But can check that the URL matches a
253+
correct pattern.
254+
255+
Args:
256+
args: Nested dict of runtime arguments.
257+
258+
Returns:
259+
Boolean: True if lint test passes, False if not.
260+
"""
261+
cell_source = args["cell_source"]
262+
docs_dir, rel_path = split_doc_path(args["path"])
263+
264+
if "r1" in rel_path.parts:
265+
return True # No website button for TF 1.x docs.
266+
267+
user_url = args["user"].get("base_url")
268+
if user_url:
269+
base_url = user_url
270+
elif str(docs_dir) == "site/zh-cn" or str(docs_dir) == "site/zh-tw":
271+
base_url = "https://tensorflow.google.cn/"
272+
else:
273+
base_url = "https://www.tensorflow.org/"
274+
275+
# Construct website URL pattern based on location of this file in repo.
276+
url_path = rel_path.with_suffix("")
277+
# If run in source repo, we don't know for certain the published subsite URL.
278+
# Match: base/<optional-subsite-path>/notebook-path
279+
this_url = rf"{base_url}[\w\-/]*{url_path}"
280+
281+
if is_button_cell_re.search(cell_source) and re.search(this_url, cell_source):
282+
return True
283+
else:
284+
# If included verbatim, bracket will fail lint. That's desired.
285+
url_format = f"{base_url}<OPTIONAL-SUBSITE-PATH>/{url_path}"
286+
fail(f"'View on' button URL doesn't match pattern: {url_format}")
287+
288+
289+
@lint(
290+
message="Missing or malformed URL in 'TFHub' button.",
291+
scope=Options.Scope.TEXT,
292+
cond=Options.Cond.ANY)
293+
def button_hub(args):
294+
"""Notebooks that mention tfhub.dev should have a TFHub button."""
295+
cell_source = args["cell_source"]
296+
file_source = args["file_source"]
297+
298+
hub_url = "https://tfhub.dev/"
299+
300+
# Only check files that mention TFHub.
301+
if file_source.find(hub_url) == -1:
302+
return True
303+
304+
if is_button_cell_re.search(cell_source) and cell_source.find(hub_url) != -1:
305+
return True
306+
else:
307+
# If included verbatim, bracket will fail lint. That's desired.
308+
url_format = f"{hub_url}<MODEL-OR-COLLECTION>"
309+
fail(f"'TFHub' button URL doesn't match pattern: {url_format}")
310+
311+
312+
@lint(
313+
message="Remove extra buttons from TF 1.x docs.",
314+
scope=Options.Scope.TEXT,
315+
cond=Options.Cond.ALL)
316+
def button_r1_extra(args):
317+
"""The r1/ docs should not have website or download buttons."""
318+
cell_source = args["cell_source"]
319+
docs_dir, rel_path = split_doc_path(args["path"])
320+
321+
# Only test r1/ notebooks.
322+
if "r1" not in rel_path.parts:
323+
return True
324+
# Only check text cells that contain the button nav bar.
325+
if not is_button_cell_re.search(cell_source):
326+
return True
327+
328+
download_url = "https://storage.googleapis.com/tensorflow_docs/"
329+
if str(docs_dir) == "site/zh-cn" or str(docs_dir) == "site/zh-tw":
330+
base_url = "https://tensorflow.google.cn/"
331+
else:
332+
base_url = "https://www.tensorflow.org/"
333+
334+
# Look for button URLs that shouldn't be there..
335+
if (re.search(f"{base_url}/(?!images)", cell_source) or
336+
cell_source.find(download_url) != -1):
337+
fail(
338+
"Remove the 'View on' and 'Download notebook' buttons since r1/ docs are not published."
339+
)
340+
else:
341+
return True

0 commit comments

Comments
 (0)