Commit | Line | Data |
---|---|---|
d74b0d31 JC |
1 | # SPDX-License-Identifier: GPL-2.0 |
2 | # Copyright 2019 Jonathan Corbet <corbet@lwn.net> | |
3 | # | |
4 | # Apply kernel-specific tweaks after the initial document processing | |
5 | # has been done. | |
6 | # | |
7 | from docutils import nodes | |
bcac386f | 8 | import sphinx |
d74b0d31 | 9 | from sphinx import addnodes |
bcac386f JC |
10 | if sphinx.version_info[0] < 2 or \ |
11 | sphinx.version_info[0] == 2 and sphinx.version_info[1] < 1: | |
12 | from sphinx.environment import NoUri | |
13 | else: | |
14 | from sphinx.errors import NoUri | |
d74b0d31 | 15 | import re |
d82b1e83 | 16 | from itertools import chain |
d74b0d31 | 17 | |
4f3e6906 JC |
18 | # |
19 | # Python 2 lacks re.ASCII... | |
20 | # | |
21 | try: | |
22 | ascii_p3 = re.ASCII | |
23 | except AttributeError: | |
24 | ascii_p3 = 0 | |
25 | ||
d74b0d31 JC |
26 | # |
27 | # Regex nastiness. Of course. | |
28 | # Try to identify "function()" that's not already marked up some | |
29 | # other way. Sphinx doesn't like a lot of stuff right after a | |
30 | # :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last | |
31 | # bit tries to restrict matches to things that won't create trouble. | |
32 | # | |
4f3e6906 | 33 | RE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=ascii_p3) |
06dc65b0 NP |
34 | |
35 | # | |
36 | # Sphinx 2 uses the same :c:type role for struct, union, enum and typedef | |
37 | # | |
f66e47f9 | 38 | RE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)', |
4f3e6906 | 39 | flags=ascii_p3) |
06dc65b0 NP |
40 | |
41 | # | |
42 | # Sphinx 3 uses a different C role for each one of struct, union, enum and | |
43 | # typedef | |
44 | # | |
4f3e6906 JC |
45 | RE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=ascii_p3) |
46 | RE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=ascii_p3) | |
47 | RE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=ascii_p3) | |
48 | RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3) | |
06dc65b0 | 49 | |
d18b0178 NP |
50 | # |
51 | # Detects a reference to a documentation page of the form Documentation/... with | |
52 | # an optional extension | |
53 | # | |
ea1d8389 | 54 | RE_doc = re.compile(r'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)') |
d74b0d31 | 55 | |
fb568273 NP |
56 | RE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$') |
57 | ||
3050edfd NP |
58 | # |
59 | # Reserved C words that we should skip when cross-referencing | |
60 | # | |
61 | Skipnames = [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ] | |
62 | ||
63 | ||
d74b0d31 JC |
64 | # |
65 | # Many places in the docs refer to common system calls. It is | |
66 | # pointless to try to cross-reference them and, as has been known | |
67 | # to happen, somebody defining a function by these names can lead | |
68 | # to the creation of incorrect and confusing cross references. So | |
69 | # just don't even try with these names. | |
70 | # | |
11fec009 | 71 | Skipfuncs = [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap', |
82bf829b JN |
72 | 'select', 'poll', 'fork', 'execve', 'clone', 'ioctl', |
73 | 'socket' ] | |
d74b0d31 | 74 | |
fb568273 NP |
75 | c_namespace = '' |
76 | ||
1ac4cfb2 | 77 | def markup_refs(docname, app, node): |
d74b0d31 JC |
78 | t = node.astext() |
79 | done = 0 | |
80 | repl = [ ] | |
d82b1e83 | 81 | # |
1ac4cfb2 NP |
82 | # Associate each regex with the function that will markup its matches |
83 | # | |
06dc65b0 NP |
84 | markup_func_sphinx2 = {RE_doc: markup_doc_ref, |
85 | RE_function: markup_c_ref, | |
86 | RE_generic_type: markup_c_ref} | |
87 | ||
88 | markup_func_sphinx3 = {RE_doc: markup_doc_ref, | |
c51d9b04 | 89 | RE_function: markup_func_ref_sphinx3, |
06dc65b0 NP |
90 | RE_struct: markup_c_ref, |
91 | RE_union: markup_c_ref, | |
92 | RE_enum: markup_c_ref, | |
93 | RE_typedef: markup_c_ref} | |
94 | ||
95 | if sphinx.version_info[0] >= 3: | |
96 | markup_func = markup_func_sphinx3 | |
97 | else: | |
98 | markup_func = markup_func_sphinx2 | |
99 | ||
1ac4cfb2 | 100 | match_iterators = [regex.finditer(t) for regex in markup_func] |
d82b1e83 | 101 | # |
1ac4cfb2 NP |
102 | # Sort all references by the starting position in text |
103 | # | |
104 | sorted_matches = sorted(chain(*match_iterators), key=lambda m: m.start()) | |
d82b1e83 | 105 | for m in sorted_matches: |
d74b0d31 | 106 | # |
d82b1e83 | 107 | # Include any text prior to match as a normal text node. |
d74b0d31 JC |
108 | # |
109 | if m.start() > done: | |
110 | repl.append(nodes.Text(t[done:m.start()])) | |
1ac4cfb2 | 111 | |
d74b0d31 | 112 | # |
1ac4cfb2 NP |
113 | # Call the function associated with the regex that matched this text and |
114 | # append its return to the text | |
d74b0d31 | 115 | # |
1ac4cfb2 NP |
116 | repl.append(markup_func[m.re](docname, app, m)) |
117 | ||
d74b0d31 JC |
118 | done = m.end() |
119 | if done < len(t): | |
120 | repl.append(nodes.Text(t[done:])) | |
121 | return repl | |
122 | ||
26c82972 JC |
123 | # |
124 | # Keep track of cross-reference lookups that failed so we don't have to | |
125 | # do them again. | |
126 | # | |
127 | failed_lookups = { } | |
309027b5 JC |
128 | def failure_seen(target): |
129 | return (target) in failed_lookups | |
130 | def note_failure(target): | |
131 | failed_lookups[target] = True | |
26c82972 | 132 | |
1ac4cfb2 | 133 | # |
c51d9b04 NP |
134 | # In sphinx3 we can cross-reference to C macro and function, each one with its |
135 | # own C role, but both match the same regex, so we try both. | |
1ac4cfb2 | 136 | # |
c51d9b04 | 137 | def markup_func_ref_sphinx3(docname, app, match): |
c51d9b04 NP |
138 | cdom = app.env.domains['c'] |
139 | # | |
140 | # Go through the dance of getting an xref out of the C domain | |
141 | # | |
fb568273 | 142 | base_target = match.group(2) |
c51d9b04 NP |
143 | target_text = nodes.Text(match.group(0)) |
144 | xref = None | |
fb568273 NP |
145 | possible_targets = [base_target] |
146 | # Check if this document has a namespace, and if so, try | |
147 | # cross-referencing inside it first. | |
148 | if c_namespace: | |
149 | possible_targets.insert(0, c_namespace + "." + base_target) | |
c51d9b04 | 150 | |
fb568273 NP |
151 | if base_target not in Skipnames: |
152 | for target in possible_targets: | |
309027b5 JC |
153 | if (target not in Skipfuncs) and not failure_seen(target): |
154 | lit_text = nodes.literal(classes=['xref', 'c', 'c-func']) | |
155 | lit_text += target_text | |
156 | pxref = addnodes.pending_xref('', refdomain = 'c', | |
157 | reftype = 'function', | |
158 | reftarget = target, | |
159 | modname = None, | |
160 | classname = None) | |
161 | # | |
162 | # XXX The Latex builder will throw NoUri exceptions here, | |
163 | # work around that by ignoring them. | |
164 | # | |
165 | try: | |
166 | xref = cdom.resolve_xref(app.env, docname, app.builder, | |
167 | 'function', target, pxref, | |
168 | lit_text) | |
169 | except NoUri: | |
170 | xref = None | |
171 | ||
172 | if xref: | |
173 | return xref | |
174 | note_failure(target) | |
c51d9b04 NP |
175 | |
176 | return target_text | |
177 | ||
1ac4cfb2 | 178 | def markup_c_ref(docname, app, match): |
c51d9b04 NP |
179 | class_str = {# Sphinx 2 only |
180 | RE_function: 'c-func', | |
06dc65b0 NP |
181 | RE_generic_type: 'c-type', |
182 | # Sphinx 3+ only | |
183 | RE_struct: 'c-struct', | |
184 | RE_union: 'c-union', | |
185 | RE_enum: 'c-enum', | |
186 | RE_typedef: 'c-type', | |
187 | } | |
c51d9b04 NP |
188 | reftype_str = {# Sphinx 2 only |
189 | RE_function: 'function', | |
06dc65b0 NP |
190 | RE_generic_type: 'type', |
191 | # Sphinx 3+ only | |
192 | RE_struct: 'struct', | |
193 | RE_union: 'union', | |
194 | RE_enum: 'enum', | |
195 | RE_typedef: 'type', | |
196 | } | |
1ac4cfb2 NP |
197 | |
198 | cdom = app.env.domains['c'] | |
199 | # | |
200 | # Go through the dance of getting an xref out of the C domain | |
201 | # | |
fb568273 | 202 | base_target = match.group(2) |
1ac4cfb2 NP |
203 | target_text = nodes.Text(match.group(0)) |
204 | xref = None | |
fb568273 NP |
205 | possible_targets = [base_target] |
206 | # Check if this document has a namespace, and if so, try | |
207 | # cross-referencing inside it first. | |
208 | if c_namespace: | |
209 | possible_targets.insert(0, c_namespace + "." + base_target) | |
210 | ||
211 | if base_target not in Skipnames: | |
212 | for target in possible_targets: | |
213 | if not (match.re == RE_function and target in Skipfuncs): | |
214 | lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]]) | |
215 | lit_text += target_text | |
216 | pxref = addnodes.pending_xref('', refdomain = 'c', | |
217 | reftype = reftype_str[match.re], | |
218 | reftarget = target, modname = None, | |
219 | classname = None) | |
220 | # | |
221 | # XXX The Latex builder will throw NoUri exceptions here, | |
222 | # work around that by ignoring them. | |
223 | # | |
224 | try: | |
225 | xref = cdom.resolve_xref(app.env, docname, app.builder, | |
226 | reftype_str[match.re], target, pxref, | |
227 | lit_text) | |
228 | except NoUri: | |
229 | xref = None | |
230 | ||
231 | if xref: | |
232 | return xref | |
233 | ||
234 | return target_text | |
1ac4cfb2 | 235 | |
d18b0178 NP |
236 | # |
237 | # Try to replace a documentation reference of the form Documentation/... with a | |
238 | # cross reference to that page | |
239 | # | |
240 | def markup_doc_ref(docname, app, match): | |
241 | stddom = app.env.domains['std'] | |
242 | # | |
243 | # Go through the dance of getting an xref out of the std domain | |
244 | # | |
ea1d8389 NP |
245 | absolute = match.group(1) |
246 | target = match.group(2) | |
247 | if absolute: | |
248 | target = "/" + target | |
d18b0178 NP |
249 | xref = None |
250 | pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc', | |
251 | reftarget = target, modname = None, | |
252 | classname = None, refexplicit = False) | |
253 | # | |
254 | # XXX The Latex builder will throw NoUri exceptions here, | |
255 | # work around that by ignoring them. | |
256 | # | |
257 | try: | |
258 | xref = stddom.resolve_xref(app.env, docname, app.builder, 'doc', | |
259 | target, pxref, None) | |
260 | except NoUri: | |
261 | xref = None | |
262 | # | |
263 | # Return the xref if we got it; otherwise just return the plain text. | |
264 | # | |
265 | if xref: | |
266 | return xref | |
267 | else: | |
268 | return nodes.Text(match.group(0)) | |
269 | ||
fb568273 NP |
270 | def get_c_namespace(app, docname): |
271 | source = app.env.doc2path(docname) | |
272 | with open(source) as f: | |
273 | for l in f: | |
274 | match = RE_namespace.search(l) | |
275 | if match: | |
276 | return match.group(1) | |
277 | return '' | |
278 | ||
d74b0d31 | 279 | def auto_markup(app, doctree, name): |
fb568273 NP |
280 | global c_namespace |
281 | c_namespace = get_c_namespace(app, name) | |
7cc4c092 JC |
282 | def text_but_not_a_reference(node): |
283 | # The nodes.literal test catches ``literal text``, its purpose is to | |
284 | # avoid adding cross-references to functions that have been explicitly | |
285 | # marked with cc:func:. | |
286 | if not isinstance(node, nodes.Text) or isinstance(node.parent, nodes.literal): | |
287 | return False | |
288 | ||
289 | child_of_reference = False | |
290 | parent = node.parent | |
291 | while parent: | |
292 | if isinstance(parent, nodes.Referential): | |
293 | child_of_reference = True | |
294 | break | |
295 | parent = parent.parent | |
296 | return not child_of_reference | |
297 | ||
d74b0d31 JC |
298 | # |
299 | # This loop could eventually be improved on. Someday maybe we | |
300 | # want a proper tree traversal with a lot of awareness of which | |
301 | # kinds of nodes to prune. But this works well for now. | |
302 | # | |
d74b0d31 | 303 | for para in doctree.traverse(nodes.paragraph): |
7cc4c092 JC |
304 | for node in para.traverse(condition=text_but_not_a_reference): |
305 | node.parent.replace(node, markup_refs(name, app, node)) | |
d74b0d31 JC |
306 | |
307 | def setup(app): | |
308 | app.connect('doctree-resolved', auto_markup) | |
309 | return { | |
310 | 'parallel_read_safe': True, | |
311 | 'parallel_write_safe': True, | |
312 | } |