1
1
from contextlib import suppress
2
2
from datetime import datetime
3
3
from importlib import metadata
4
+ from pathlib import Path
4
5
from urllib .parse import urljoin
5
- import os
6
6
import urllib .request
7
7
8
8
from docutils import nodes
9
9
from lxml import html
10
10
import certifi
11
11
12
- __version__ = "1.2 .0"
12
+ __version__ = "2.0 .0"
13
13
14
- BASE_URL = "https://json-schema.org/draft-07/"
15
- VALIDATION_SPEC = urljoin (BASE_URL , "json-schema-validation.html" )
16
- REF_URL = urljoin (BASE_URL , "json-schema-core.html#rfc.section.8.3" )
17
- SCHEMA_URL = urljoin (BASE_URL , "json-schema-core.html#rfc.section.7" )
14
+ BASE_URL = "https://json-schema.org/draft/2020-12/"
15
+ VOCABULARIES = {
16
+ "core" : urljoin (BASE_URL , "json-schema-core.html" ),
17
+ "validation" : urljoin (BASE_URL , "json-schema-validation.html" ),
18
+ }
19
+ HARDCODED = {
20
+ "$ref" : "https://json-schema.org/draft/2020-12/json-schema-core.html#ref" ,
21
+ "$schema" : "https://json-schema.org/draft/2020-12/json-schema-core.html#name-the-schema-keyword" , # noqa: E501
22
+ "format" : "https://json-schema.org/draft/2020-12/json-schema-validation.html#name-implementation-requirements" , # noqa: E501
23
+ }
18
24
19
25
20
26
def setup (app ):
@@ -30,24 +36,31 @@ def setup(app):
30
36
31
37
app .add_config_value ("cache_path" , "_cache" , "" )
32
38
33
- os .makedirs (app .config .cache_path , exist_ok = True )
39
+ CACHE = Path (app .config .cache_path )
40
+ CACHE .mkdir (exist_ok = True )
34
41
35
- path = os .path .join (app .config .cache_path , "spec.html" )
36
- spec = fetch_or_load (path )
37
- app .add_role ("kw" , docutils_does_not_allow_using_classes (spec ))
42
+ documents = {
43
+ url : fetch_or_load (vocabulary_path = CACHE / f"{ name } .html" , url = url )
44
+ for name , url in VOCABULARIES .items ()
45
+ }
46
+ app .add_role ("kw" , docutils_does_not_allow_using_classes (documents ))
38
47
39
48
return dict (version = __version__ , parallel_read_safe = True )
40
49
41
50
42
- def fetch_or_load (spec_path ):
51
+ def fetch_or_load (vocabulary_path , url ):
43
52
"""
44
53
Fetch a new specification or use the cache if it's current.
45
54
46
55
Arguments:
47
56
48
- cache_path:
57
+ vocabulary_path:
58
+
59
+ the local path to a cached vocabulary document
60
+
61
+ url:
49
62
50
- the path to a cached specification
63
+ the URL of the vocabulary document
51
64
"""
52
65
53
66
headers = {
@@ -58,24 +71,23 @@ def fetch_or_load(spec_path):
58
71
}
59
72
60
73
with suppress (FileNotFoundError ):
61
- modified = datetime .utcfromtimestamp (os . path . getmtime ( spec_path ) )
74
+ modified = datetime .utcfromtimestamp (vocabulary_path . stat (). st_mtime )
62
75
date = modified .strftime ("%a, %d %b %Y %I:%M:%S UTC" )
63
76
headers ["If-Modified-Since" ] = date
64
77
65
- request = urllib .request .Request (VALIDATION_SPEC , headers = headers )
78
+ request = urllib .request .Request (url , headers = headers )
66
79
response = urllib .request .urlopen (request , cafile = certifi .where ())
67
80
68
81
if response .code == 200 :
69
- with open (spec_path , "w+b" ) as spec :
82
+ with vocabulary_path . open ("w+b" ) as spec :
70
83
spec .writelines (response )
71
84
spec .seek (0 )
72
- return html .parse (spec )
85
+ return html .parse (spec ). getroot ()
73
86
74
- with open (spec_path ) as spec :
75
- return html .parse (spec )
87
+ return html .parse (vocabulary_path .read_bytes ()).getroot ()
76
88
77
89
78
- def docutils_does_not_allow_using_classes (spec ):
90
+ def docutils_does_not_allow_using_classes (vocabularies ):
79
91
"""
80
92
Yeah.
81
93
@@ -118,27 +130,22 @@ def keyword(name, raw_text, text, lineno, inliner):
118
130
iterable of system messages, both possibly empty
119
131
"""
120
132
121
- if text == "$ref" :
122
- return [nodes .reference (raw_text , text , refuri = REF_URL )], []
123
- elif text == "$schema" :
124
- return [nodes .reference (raw_text , text , refuri = SCHEMA_URL )], []
133
+ hardcoded = HARDCODED .get (text )
134
+ if hardcoded is not None :
135
+ return [nodes .reference (raw_text , text , refuri = hardcoded )], []
125
136
126
137
# find the header in the validation spec containing matching text
127
- header = spec .xpath ("//h1[contains(text(), '{0}')]" .format (text ))
138
+ for vocabulary_url , spec in vocabularies .items ():
139
+ header = spec .get_element_by_id (f"name-{ text .lower ()} " , None )
128
140
129
- if len (header ) == 0 :
141
+ if header is not None :
142
+ uri = urljoin (vocabulary_url , header .find ("a" ).attrib ["href" ])
143
+ break
144
+ else :
130
145
inliner .reporter .warning (
131
146
"Didn't find a target for {0}" .format (text ),
132
147
)
133
- uri = VALIDATION_SPEC
134
- else :
135
- if len (header ) > 1 :
136
- inliner .reporter .info (
137
- "Found multiple targets for {0}" .format (text ),
138
- )
139
-
140
- # get the href from link in the header
141
- uri = urljoin (VALIDATION_SPEC , header [0 ].find ("a" ).attrib ["href" ])
148
+ uri = BASE_URL
142
149
143
150
reference = nodes .reference (raw_text , text , refuri = uri )
144
151
return [reference ], []
0 commit comments