* [PATCH v3] qapi: Add documentation format validation @ 2025-10-29 17:30 Vladimir Sementsov-Ogievskiy 2025-10-30 14:01 ` Markus Armbruster 0 siblings, 1 reply; 5+ messages in thread From: Vladimir Sementsov-Ogievskiy @ 2025-10-29 17:30 UTC (permalink / raw) To: armbru; +Cc: michael.roth, qemu-devel, vsementsov Add explicit validation for QAPI documentation formatting rules: 1. Lines must not exceed 70 columns in width (including '# ' prefix) 2. Sentences must be separated by two spaces Example sections are excluded, we don't require them to be <= 70, that would be too restrictive. Example sections share common 80-columns recommendations (not requirements). Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> --- Hi all! This substitutes my previous attempt "[PATCH v2 00/33] qapi: docs: width=70 and two spaces between sentences" Supersedes: <20251011140441.297246-1-vsementsov@yandex-team.ru> v3: 01: ignore example sections other commits: dropped :) Of course, this _does not_ build on top of current master. v3 is to be based on top of coming soon doc-cleanup series by Markus. scripts/qapi/parser.py | 46 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py index 9fbf80a541..b9d76fff39 100644 --- a/scripts/qapi/parser.py +++ b/scripts/qapi/parser.py @@ -108,6 +108,9 @@ def __init__(self, self.exprs: List[QAPIExpression] = [] self.docs: List[QAPIDoc] = [] + # State for tracking qmp-example blocks + self._in_qmp_example = False + # Showtime! self._parse() @@ -423,12 +426,53 @@ def get_doc_line(self) -> Optional[str]: if self.val != '##': raise QAPIParseError( self, "junk after '##' at end of documentation comment") + self._in_qmp_example = False return None if self.val == '#': return '' if self.val[1] != ' ': raise QAPIParseError(self, "missing space after #") - return self.val[2:].rstrip() + + line = self.val[2:].rstrip() + + if line.startswith('.. qmp-example::'): + self._in_qmp_example = True + + if not self._in_qmp_example: + self._validate_doc_line_format(line) + + return line + + def _validate_doc_line_format(self, line: str) -> None: + """ + Validate documentation format rules for a single line: + 1. Lines should not exceed 70 columns + 2. Sentences should be separated by two spaces + """ + full_line_length = len(line) + 2 # "# " = 2 characters + if full_line_length > 70: + # Skip URL lines - they can't be broken + stripped_line = line.strip() + if (stripped_line.startswith(('http://', 'https://', 'ftp://')) and + ' ' not in stripped_line): + pass + else: + raise QAPIParseError( + self, f"documentation line exceeds 70 columns " + f"({full_line_length} columns): {line[:50]}..." + ) + + single_space_pattern = r'[.!?] [A-Z0-9]' + for m in list(re.finditer(single_space_pattern, line)): + left = line[0:m.start() + 1] + # Ignore abbreviations and numbered lists + if left.endswith('e.g.') or re.fullmatch(r' *\d\.', left): + continue + raise QAPIParseError( + self, f"documentation has single space after sentence " + f"ending. Use two spaces between sentences: " + f"...{line[m.start()-5:m.end()+5]}..." + ) @staticmethod def _match_at_name_colon(string: str) -> Optional[Match[str]]: -- 2.48.1 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v3] qapi: Add documentation format validation 2025-10-29 17:30 [PATCH v3] qapi: Add documentation format validation Vladimir Sementsov-Ogievskiy @ 2025-10-30 14:01 ` Markus Armbruster 2025-10-30 18:11 ` Markus Armbruster ` (2 more replies) 0 siblings, 3 replies; 5+ messages in thread From: Markus Armbruster @ 2025-10-30 14:01 UTC (permalink / raw) To: Vladimir Sementsov-Ogievskiy; +Cc: michael.roth, qemu-devel Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes: > Add explicit validation for QAPI documentation formatting rules: > > 1. Lines must not exceed 70 columns in width (including '# ' prefix) > 2. Sentences must be separated by two spaces > > Example sections are excluded, we don't require them to be <= 70, > that would be too restrictive. > > Example sections share common 80-columns recommendations (not > requirements). > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> > --- > > Hi all! > > This substitutes my previous attempt > "[PATCH v2 00/33] qapi: docs: width=70 and two spaces between sentences" > Supersedes: <20251011140441.297246-1-vsementsov@yandex-team.ru> > > v3: > 01: ignore example sections > other commits: dropped :) > > Of course, this _does not_ build on top of current master. v3 is > to be based on top of coming soon doc-cleanup series by Markus. I'll post this today. > scripts/qapi/parser.py | 46 +++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 45 insertions(+), 1 deletion(-) > > diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py > index 9fbf80a541..b9d76fff39 100644 > --- a/scripts/qapi/parser.py > +++ b/scripts/qapi/parser.py > @@ -108,6 +108,9 @@ def __init__(self, > self.exprs: List[QAPIExpression] = [] > self.docs: List[QAPIDoc] = [] > > + # State for tracking qmp-example blocks > + self._in_qmp_example = False > + > # Showtime! > self._parse() > > @@ -423,12 +426,53 @@ def get_doc_line(self) -> Optional[str]: > if self.val != '##': > raise QAPIParseError( > self, "junk after '##' at end of documentation comment") > + self._in_qmp_example = False > return None > if self.val == '#': > return '' > if self.val[1] != ' ': > raise QAPIParseError(self, "missing space after #") > - return self.val[2:].rstrip() > + > + line = self.val[2:].rstrip() > + > + if line.startswith('.. qmp-example::'): This matches how we spell the directive, but ReST appears to accept additional spaces. Let's use re.match(r'\.\. +qmp-example *::', line) > + self._in_qmp_example = True > + > + if not self._in_qmp_example: > + self._validate_doc_line_format(line) > + > + return line self._in_qmp_example is initialized to False at the beginning of a doc comment block, becomes True at the first '.. qmp-example::' line, then remains True until the end of the doc comment block. This isn't quite right. Consider qapi/control.json: ## # @qmp_capabilities: # # Enable QMP capabilities. # # @enable: An optional list of `QMPCapability` values to enable. The # client must not enable any capability that is not mentioned in # the QMP greeting message. If the field is not provided, it # means no QMP capabilities will be enabled. (since 2.12) # # .. qmp-example:: # # -> { "execute": "qmp_capabilities", # "arguments": { "enable": [ "oob" ] } } # <- { "return": {} } # # .. note:: This command is valid exactly when first connecting: it # must be issued before any other command will be accepted, and # will fail once the monitor is accepting other commands. (see # :doc:`/interop/qmp-spec`) # # .. note:: The QMP client needs to explicitly enable QMP # capabilities, otherwise all the QMP capabilities will be turned # off by default. # # Since: 0.13 ## ReST directives like '.. qmp-example::' and '.. note::' stop at the first non-blank non-indented line. We need to change self._in_qmp_example from True to False at such a line. > + > + def _validate_doc_line_format(self, line: str) -> None: > + """ > + Validate documentation format rules for a single line: > + 1. Lines should not exceed 70 columns > + 2. Sentences should be separated by two spaces > + """ > + full_line_length = len(line) + 2 # "# " = 2 characters > + if full_line_length > 70: > + # Skip URL lines - they can't be broken > + stripped_line = line.strip() > + if (stripped_line.startswith(('http://', 'https://', 'ftp://')) and > + ' ' not in stripped_line): Avoidable long line: if (stripped_line.startswith(('http://', 'https://', 'ftp://')) and ' ' not in stripped_line): But I'd prefer if re.match(r' *(https?|ftp)://[^ ]*$', line): instead. > + pass > + else: > + raise QAPIParseError( > + self, f"documentation line exceeds 70 columns " > + f"({full_line_length} columns): {line[:50]}..." The resulting error message is rather long, the meaning of the parenthesis is not immediately obvious, and quoting the tail of the line doesn't feel worthwhile. I think "documentation line exceeds 70 columns" or "documentation line longer than 70 characters" suffices. > + ) > + > + single_space_pattern = r'[.!?] [A-Z0-9]' This pattern matches possible sentence ends that lack a second space: sentence-ending punctuation, single space, capital letter or digit. The pattern avoids common false positives in the middle of a sentence, such as "i.e." here: # Describes a block export, i.e. how single node should be exported on ~~~~~ Good. There's still a risk of false positives, though: a capital letter need not be the start of a sentence, it could also be a proper noun, or the pronoun "I". I figure the latter is vanishingly unlikely to occur in technical documentation. Example of the former: # @format: Extent type (e.g. FLAT or SPARSE) You filter these out below. Digits are even more ambiguous than capital letters: they can occur in the middle of a sentence as much as at the beginning. Do they occur? $ git-grep '\. [0-9]' \*.json docs/interop/firmware.json:# of SMRAM. 48MB should suffice for 4TB of guest-phys Yes, but only in a QAPI schema we don't actually parse. We should probably update these to conform to conventions. Not today. Let's keep the digits in the pattern for now. The pattern misses sentence ends like this one: # @children: Information about child block nodes. (since: 10.1) ~~~~~~~ As far as I can tell, all misses are before "(". Let's simply add "(" to the pattern: [A-Z0-9(]. > + for m in list(re.finditer(single_space_pattern, line)): > + left = line[0:m.start() + 1] @left is the line left of the match plus the first character of the match, i.e. the punctuation character. > + # Ignore abbreviations and numbered lists > + if left.endswith('e.g.') or re.fullmatch(r' *\d\.', left): > + continue Skip if we matched "e.g. " or a numbered list item. We saw above why we need the former: "e.g. FLAT". We need the latter for # 1. The guest may be in a catastrophic state or can have # corrupted memory, which cannot be trusted Whenever I see regexp match followed by string split followed by more matching, I wonder whether a single match would do. Here's my try: single_space_pattern = r'(\be\.g\.|^ *\d\.|([.!?])) [A-Z0-9(]' for m in list(re.finditer(single_space_pattern, line)): if not m.group(2): continue This uses a common regexp trick: match exception | ... | (wanted), then check whether the group containing wanted matched. > + raise QAPIParseError( > + self, f"documentation has single space after sentence " > + f"ending. Use two spaces between sentences: " > + f"...{line[m.start()-5:m.end()+5]}..." Again, the error message is rather long: ../qapi/block-core.json:162:1: documentation has single space after sentence ending. Use two spaces between sentences: ...ormat. If e... The error message consists of two parts: the complaint, and a hint on how to fix it. We can separate the two by embedding a newline, like expr.py's check_keys() does: raise QAPIParseError( self, "documentation has single space after sentence ending" "\nUse two spaces between sentences: " f"...{line[m.start(2)-5:m.end(2)+5]}...") This results in ../qapi/block-core.json:162:1: documentation has single space after sentence ending Use two spaces between sentences: ...ormat. If e... Better, but the "...ormat. If e..." part is still odd. Ideally, the error message location would point right to the trouble spot, like so: ../qapi/block-core.json:162:47: bla bla Sadly, it points to the beginning of the line instead: "162:1:" instead "162:47". This is because the error is reported for the parser's current position, and the current position is the beginning of the token, i.e. the '#' starting the comment. I can offer a disgusting hack: # HACK so the error message points to the the offending spot self.pos = self.line_pos + 2 + m.start(2) + 1 raise QAPIParseError( self, "Use two spaces between sentences") This results in ../qapi/block-core.json:162:47: Use two spaces between sentences Emacs then takes me right to the offending single space. > + ) > > @staticmethod > def _match_at_name_colon(string: str) -> Optional[Match[str]]: ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3] qapi: Add documentation format validation 2025-10-30 14:01 ` Markus Armbruster @ 2025-10-30 18:11 ` Markus Armbruster 2025-10-31 9:42 ` Vladimir Sementsov-Ogievskiy 2025-10-31 10:00 ` Markus Armbruster 2 siblings, 0 replies; 5+ messages in thread From: Markus Armbruster @ 2025-10-30 18:11 UTC (permalink / raw) To: Vladimir Sementsov-Ogievskiy; +Cc: michael.roth, qemu-devel Markus Armbruster <armbru@redhat.com> writes: > Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes: > >> Add explicit validation for QAPI documentation formatting rules: >> >> 1. Lines must not exceed 70 columns in width (including '# ' prefix) >> 2. Sentences must be separated by two spaces >> >> Example sections are excluded, we don't require them to be <= 70, >> that would be too restrictive. >> >> Example sections share common 80-columns recommendations (not >> requirements). >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> >> --- >> >> Hi all! >> >> This substitutes my previous attempt >> "[PATCH v2 00/33] qapi: docs: width=70 and two spaces between sentences" >> Supersedes: <20251011140441.297246-1-vsementsov@yandex-team.ru> >> >> v3: >> 01: ignore example sections >> other commits: dropped :) >> >> Of course, this _does not_ build on top of current master. v3 is >> to be based on top of coming soon doc-cleanup series by Markus. > > I'll post this today. Make that tomorrow: docs/interop/firmware.json needs cleanup, or else "make check" fails. Going to throw in docs/interop/vhost-user.json for good measure. [...] ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3] qapi: Add documentation format validation 2025-10-30 14:01 ` Markus Armbruster 2025-10-30 18:11 ` Markus Armbruster @ 2025-10-31 9:42 ` Vladimir Sementsov-Ogievskiy 2025-10-31 10:00 ` Markus Armbruster 2 siblings, 0 replies; 5+ messages in thread From: Vladimir Sementsov-Ogievskiy @ 2025-10-31 9:42 UTC (permalink / raw) To: Markus Armbruster; +Cc: michael.roth, qemu-devel On 30.10.25 17:01, Markus Armbruster wrote: > Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes: > >> Add explicit validation for QAPI documentation formatting rules: >> >> 1. Lines must not exceed 70 columns in width (including '# ' prefix) >> 2. Sentences must be separated by two spaces >> >> Example sections are excluded, we don't require them to be <= 70, >> that would be too restrictive. >> >> Example sections share common 80-columns recommendations (not >> requirements). >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> >> --- >> >> Hi all! >> >> This substitutes my previous attempt >> "[PATCH v2 00/33] qapi: docs: width=70 and two spaces between sentences" >> Supersedes: <20251011140441.297246-1-vsementsov@yandex-team.ru> >> >> v3: >> 01: ignore example sections >> other commits: dropped :) >> >> Of course, this _does not_ build on top of current master. v3 is >> to be based on top of coming soon doc-cleanup series by Markus. > > I'll post this today. > >> scripts/qapi/parser.py | 46 +++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 45 insertions(+), 1 deletion(-) >> >> diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py >> index 9fbf80a541..b9d76fff39 100644 >> --- a/scripts/qapi/parser.py >> +++ b/scripts/qapi/parser.py [..] > > single_space_pattern = r'(\be\.g\.|^ *\d\.|([.!?])) [A-Z0-9(]' > for m in list(re.finditer(single_space_pattern, line)): > if not m.group(2): > continue > > This uses a common regexp trick: match exception | ... | (wanted), then > check whether the group containing wanted matched. Cool, I didn't know this! > >> + raise QAPIParseError( >> + self, f"documentation has single space after sentence " >> + f"ending. Use two spaces between sentences: " >> + f"...{line[m.start()-5:m.end()+5]}..." > [..] > > I can offer a disgusting hack: > > # HACK so the error message points to the the offending spot > self.pos = self.line_pos + 2 + m.start(2) + 1 > raise QAPIParseError( > self, "Use two spaces between sentences") > > This results in > > ../qapi/block-core.json:162:47: Use two spaces between sentences > > Emacs then takes me right to the offending single space. > Great! >> + ) >> >> @staticmethod >> def _match_at_name_colon(string: str) -> Optional[Match[str]]: > Thanks for careful review, will resend soon. -- Best regards, Vladimir ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3] qapi: Add documentation format validation 2025-10-30 14:01 ` Markus Armbruster 2025-10-30 18:11 ` Markus Armbruster 2025-10-31 9:42 ` Vladimir Sementsov-Ogievskiy @ 2025-10-31 10:00 ` Markus Armbruster 2 siblings, 0 replies; 5+ messages in thread From: Markus Armbruster @ 2025-10-31 10:00 UTC (permalink / raw) To: Vladimir Sementsov-Ogievskiy; +Cc: michael.roth, qemu-devel Markus Armbruster <armbru@redhat.com> writes: > Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes: > >> Add explicit validation for QAPI documentation formatting rules: >> >> 1. Lines must not exceed 70 columns in width (including '# ' prefix) >> 2. Sentences must be separated by two spaces >> >> Example sections are excluded, we don't require them to be <= 70, >> that would be too restrictive. >> >> Example sections share common 80-columns recommendations (not >> requirements). >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> >> --- >> >> Hi all! >> >> This substitutes my previous attempt >> "[PATCH v2 00/33] qapi: docs: width=70 and two spaces between sentences" >> Supersedes: <20251011140441.297246-1-vsementsov@yandex-team.ru> >> >> v3: >> 01: ignore example sections >> other commits: dropped :) >> >> Of course, this _does not_ build on top of current master. v3 is >> to be based on top of coming soon doc-cleanup series by Markus. [...] >> + single_space_pattern = r'[.!?] [A-Z0-9]' > > This pattern matches possible sentence ends that lack a second space: > sentence-ending punctuation, single space, capital letter or digit. > > The pattern avoids common false positives in the middle of a sentence, > such as "i.e." here: > > # Describes a block export, i.e. how single node should be exported on > ~~~~~ > > Good. There's still a risk of false positives, though: a capital letter > need not be the start of a sentence, it could also be a proper noun, or > the pronoun "I". I figure the latter is vanishingly unlikely to occur > in technical documentation. Example of the former: > > # @format: Extent type (e.g. FLAT or SPARSE) > > You filter these out below. > > Digits are even more ambiguous than capital letters: they can occur in > the middle of a sentence as much as at the beginning. Do they occur? > > $ git-grep '\. [0-9]' \*.json > docs/interop/firmware.json:# of SMRAM. 48MB should suffice for 4TB of guest-phys > > Yes, but only in a QAPI schema we don't actually parse. We should > probably update these to conform to conventions. Not today. Actually, we do parse it, but only in "make check". See docs/meson.build. The cleanup series I just sent covers it. Unfortunately, this adds another case for you. In master: # @executable: Identifies the firmware executable. The @mode # indicates whether there will be an associated # NVRAM template present. The preferred # corresponding QEMU command line options are # -drive if=none,id=pflash0,readonly=on,file=@executable.@filename,format=@executable.@format # -machine pflash0=pflash0 # or equivalent -blockdev instead of -drive. When # @mode is @combined the executable must be # cloned before use and configured with readonly=off. # With QEMU versions older than 4.0, you have to use # -drive if=pflash,unit=0,readonly=on,file=@executable.@filename,format=@executable.@format My series cleans this up to # @executable: Identifies the firmware executable. The @mode # indicates whether there will be an associated NVRAM template # present. The preferred corresponding QEMU command line options # are # # :: # # -drive if=none,id=pflash0,readonly=on,file=@executable.@filename,format=@executable.@format # -machine pflash0=pflash0 # # or equivalent -blockdev instead of -drive. When @mode is # @combined the executable must be cloned before use and # configured with readonly=off. With QEMU versions older than # 4.0, you have to use # # :: # # -drive if=pflash,unit=0,readonly=on,file=@executable.@filename,format=@executable.@format This uses ReST markup for literal blocks. There are two forms. 1. A paragraph containing just "::" starts a literal block. 2. A paragraph ending with "::" also starts one. In either case, the block's contents is indented. See https://docutils.sourceforge.io/docs/user/rst/quickref.html#literal-blocks for more. I think you need to switch literal mode on when you detect a qmp-example directive or a literal block, and record the line's indentation. switch it off at the first line that is no more indented than the recorded indentation. [...] ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-10-31 10:01 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2025-10-29 17:30 [PATCH v3] qapi: Add documentation format validation Vladimir Sementsov-Ogievskiy 2025-10-30 14:01 ` Markus Armbruster 2025-10-30 18:11 ` Markus Armbruster 2025-10-31 9:42 ` Vladimir Sementsov-Ogievskiy 2025-10-31 10:00 ` Markus Armbruster
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.