Skip to content

Commit 1a097f7

Browse files
authored
Add documentation to a variety of things in html5ever (#721)
Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
1 parent d36824a commit 1a097f7

6 files changed

Lines changed: 95 additions & 7 deletions

File tree

html5ever/src/tokenizer/interface.rs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,18 @@ pub use self::Token::{CharacterTokens, CommentToken, DoctypeToken, TagToken};
2020
pub use self::Token::{EOFToken, NullCharacterToken, ParseError};
2121

2222
/// A `DOCTYPE` token.
23-
// FIXME: already exists in Servo DOM
2423
#[derive(PartialEq, Eq, Clone, Debug, Default)]
2524
pub struct Doctype {
2625
pub name: Option<StrTendril>,
2726
pub public_id: Option<StrTendril>,
2827
pub system_id: Option<StrTendril>,
28+
/// Indicates if this DOCTYPE token should put the document in [quirks mode].
29+
///
30+
/// [quirks mode]: https://dom.spec.whatwg.org/#concept-document-quirks
2931
pub force_quirks: bool,
3032
}
3133

34+
/// Whether the tag is a start or an end tag.
3235
#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
3336
pub enum TagKind {
3437
StartTag,
@@ -38,8 +41,12 @@ pub enum TagKind {
3841
/// A tag token.
3942
#[derive(PartialEq, Eq, Clone, Debug)]
4043
pub struct Tag {
44+
/// Whether the tag is a start or an end tag.
4145
pub kind: TagKind,
4246
pub name: LocalName,
47+
/// Whether the tag closes itself.
48+
///
49+
/// An example of a self closing tag is `<foo />`.
4350
pub self_closing: bool,
4451
pub attrs: Vec<Attribute>,
4552
}
@@ -70,21 +77,32 @@ impl Tag {
7077

7178
#[derive(PartialEq, Eq, Debug)]
7279
pub enum Token {
80+
/// A DOCTYPE declaration like `<!DOCTYPE html>`
7381
DoctypeToken(Doctype),
82+
/// A opening or closing tag, like `<foo>` or `</bar>`
7483
TagToken(Tag),
84+
/// A comment like `<!-- foo -->`.
7585
CommentToken(StrTendril),
86+
/// A sequence of characters.
7687
CharacterTokens(StrTendril),
88+
/// A `U+0000 NULL` character in the input.
7789
NullCharacterToken,
7890
EOFToken,
7991
ParseError(Cow<'static, str>),
8092
}
8193

94+
/// The result of a [TokenSink] consuming a single token.
8295
#[derive(Debug, PartialEq)]
8396
#[must_use]
8497
pub enum TokenSinkResult<Handle> {
98+
/// The tokenizer can continue parsing the input as usual.
8599
Continue,
100+
/// The token sink has completed parsing a `<script>` tag, blocking the tokenizer
101+
/// until the script is executed.
86102
Script(Handle),
103+
/// The tokenizer should set its state to the [PLAINTEXT state](https://html.spec.whatwg.org/#plaintext-state).
87104
Plaintext,
105+
/// The tokenizer should set its state to the given rawdata state.
88106
RawData(states::RawKind),
89107
/// The document indicated that the given encoding should be used to parse it.
90108
///
@@ -99,18 +117,20 @@ pub enum TokenSinkResult<Handle> {
99117

100118
/// Types which can receive tokens from the tokenizer.
101119
pub trait TokenSink {
120+
/// The type of a DOM node.
102121
type Handle;
103122

104123
/// Process a token.
105124
fn process_token(&self, token: Token, line_number: u64) -> TokenSinkResult<Self::Handle>;
106125

107-
// Signal sink that tokenization reached the end.
126+
/// Signal that tokenization reached the end of the document.
108127
fn end(&self) {}
109128

110-
/// Used in the markup declaration open state. By default, this always
129+
/// Used in the [markup declaration open state]. By default, this always
111130
/// returns false and thus all CDATA sections are tokenized as bogus
112131
/// comments.
113-
/// <https://html.spec.whatwg.org/multipage/#markup-declaration-open-state>
132+
///
133+
/// [markup declaration open state]: https://html.spec.whatwg.org/multipage/#markup-declaration-open-state
114134
fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
115135
false
116136
}

html5ever/src/tokenizer/mod.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,23 @@ mod char_ref;
3939
mod interface;
4040
pub mod states;
4141

42+
/// The result of invoking the tokenizer once.
4243
pub enum ProcessResult<Handle> {
44+
/// The tokenizer should be re-invoked immediately.
4345
Continue,
46+
/// The tokenizer has not finished, but it needs to wait for more
47+
/// input to arrive before it can continue.
4448
Suspend,
49+
/// The tokenizer was blocked by a `<script>`.
50+
///
51+
/// This `<script>` needs to be executed before tokenization
52+
/// can continue, as it might invoke `document.write`.
4553
Script(Handle),
54+
/// The tokenizer was blocked because it found a `<meta charset>` tag.
55+
///
56+
/// Such tags may force the user agent to re-parse the document with the new
57+
/// encoding, but non-conformant implementations can reasonably treat
58+
/// this as [Self::Continue].
4659
EncodingIndicator(StrTendril),
4760
}
4861

html5ever/src/tokenizer/states.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,51 +47,85 @@ pub enum AttrValueKind {
4747

4848
#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
4949
pub enum State {
50+
/// <https://html.spec.whatwg.org/#data-state>
5051
Data,
52+
/// <https://html.spec.whatwg.org/#plaintext-state>
5153
Plaintext,
54+
/// <https://html.spec.whatwg.org/#tag-open-state>
5255
TagOpen,
56+
/// <https://html.spec.whatwg.org/#tag-open-state>
5357
EndTagOpen,
58+
/// <https://html.spec.whatwg.org/#tag-name-state>
5459
TagName,
5560
RawData(RawKind),
5661
RawLessThanSign(RawKind),
5762
RawEndTagOpen(RawKind),
5863
RawEndTagName(RawKind),
5964
ScriptDataEscapeStart(ScriptEscapeKind),
65+
/// <https://html.spec.whatwg.org/#script-data-escape-start-dash-state>
6066
ScriptDataEscapeStartDash,
6167
ScriptDataEscapedDash(ScriptEscapeKind),
6268
ScriptDataEscapedDashDash(ScriptEscapeKind),
69+
/// <https://html.spec.whatwg.org/#script-data-double-escape-end-state>
6370
ScriptDataDoubleEscapeEnd,
71+
/// <https://html.spec.whatwg.org/#before-attribute-name-state>
6472
BeforeAttributeName,
73+
/// <https://html.spec.whatwg.org/#attribute-name-state>
6574
AttributeName,
75+
/// <https://html.spec.whatwg.org/#after-attribute-name-state>
6676
AfterAttributeName,
77+
/// <https://html.spec.whatwg.org/#before-attribute-value-state>
6778
BeforeAttributeValue,
6879
AttributeValue(AttrValueKind),
80+
/// <https://html.spec.whatwg.org/#after-attribute-value-(quoted)-state>
6981
AfterAttributeValueQuoted,
82+
/// <https://html.spec.whatwg.org/#self-closing-start-tag-state>
7083
SelfClosingStartTag,
84+
/// <https://html.spec.whatwg.org/#bogus-comment-state>
7185
BogusComment,
86+
/// <https://html.spec.whatwg.org/#markup-declaration-open-state>
7287
MarkupDeclarationOpen,
88+
/// <https://html.spec.whatwg.org/#comment-start-state>
7389
CommentStart,
90+
/// <https://html.spec.whatwg.org/#comment-start-dash-state>
7491
CommentStartDash,
92+
/// <https://html.spec.whatwg.org/#comment-state>
7593
Comment,
94+
/// <https://html.spec.whatwg.org/#comment-less-than-sign-state>
7695
CommentLessThanSign,
96+
/// <https://html.spec.whatwg.org/#comment-less-than-sign-bang-state>
7797
CommentLessThanSignBang,
98+
/// <https://html.spec.whatwg.org/#comment-less-than-sign-bang-dash-state>
7899
CommentLessThanSignBangDash,
100+
/// <https://html.spec.whatwg.org/#comment-less-than-sign-bang-dash-dash-state>
79101
CommentLessThanSignBangDashDash,
102+
/// <https://html.spec.whatwg.org/#comment-end-dash-state>
80103
CommentEndDash,
104+
/// <https://html.spec.whatwg.org/#comment-end-state>
81105
CommentEnd,
106+
/// <https://html.spec.whatwg.org/#comment-end-bang-state>
82107
CommentEndBang,
108+
/// <https://html.spec.whatwg.org/#doctype-state>
83109
Doctype,
110+
/// <https://html.spec.whatwg.org/#before-doctype-name-state>
84111
BeforeDoctypeName,
112+
/// <https://html.spec.whatwg.org/#doctype-name-state>
85113
DoctypeName,
114+
/// <https://html.spec.whatwg.org/#after-doctype-name-state>
86115
AfterDoctypeName,
87116
AfterDoctypeKeyword(DoctypeIdKind),
88117
BeforeDoctypeIdentifier(DoctypeIdKind),
89118
DoctypeIdentifierDoubleQuoted(DoctypeIdKind),
90119
DoctypeIdentifierSingleQuoted(DoctypeIdKind),
91120
AfterDoctypeIdentifier(DoctypeIdKind),
121+
/// <https://html.spec.whatwg.org/#between-doctype-public-and-system-identifiers-state>
92122
BetweenDoctypePublicAndSystemIdentifiers,
123+
/// <https://html.spec.whatwg.org/#bogus-doctype-state>
93124
BogusDoctype,
125+
/// <https://html.spec.whatwg.org/#cdata-section-state>
94126
CdataSection,
127+
/// <https://html.spec.whatwg.org/#cdata-section-bracket-state>
95128
CdataSectionBracket,
129+
/// <https://html.spec.whatwg.org/#cdata-section-end-state>
96130
CdataSectionEnd,
97131
}

html5ever/src/tree_builder/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ where
269269
}
270270

271271
/// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's
272-
/// internal state. This is intended to support garbage-collected DOMs.
272+
/// internal state. This is intended to support garbage-collected DOMs.
273273
pub fn trace_handles(&self, tracer: &dyn Tracer<Handle = Handle>) {
274274
tracer.trace_handle(&self.doc_handle);
275275
for e in &*self.open_elems.borrow() {

html5ever/src/tree_builder/rules.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,9 @@ where
8989
Handle: Clone,
9090
Sink: TreeSink<Handle = Handle>,
9191
{
92-
/// Process an HTML content token
92+
/// Process an HTML token.
9393
///
94-
/// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml
94+
/// <https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml>
9595
pub(crate) fn step(&self, mode: InsertionMode, token: Token) -> ProcessResult<Handle> {
9696
self.debug_step(mode, &token);
9797

html5ever/src/tree_builder/types.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,47 @@ use crate::tendril::StrTendril;
1616

1717
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
1818
pub(crate) enum InsertionMode {
19+
/// <https://html.spec.whatwg.org/#the-initial-insertion-mode>
1920
Initial,
21+
/// <https://html.spec.whatwg.org/#the-before-html-insertion-mode>
2022
BeforeHtml,
23+
/// <https://html.spec.whatwg.org/#the-before-head-insertion-mode>
2124
BeforeHead,
25+
/// <https://html.spec.whatwg.org/#parsing-main-inhead>
2226
InHead,
27+
/// <https://html.spec.whatwg.org/#parsing-main-inheadnoscript>
2328
InHeadNoscript,
29+
/// <https://html.spec.whatwg.org/#the-after-head-insertion-mode>
2430
AfterHead,
31+
/// <https://html.spec.whatwg.org/#parsing-main-inbody>
2532
InBody,
33+
/// <https://html.spec.whatwg.org/#parsing-main-incdata>
2634
Text,
35+
/// <https://html.spec.whatwg.org/#parsing-main-intable>
2736
InTable,
37+
/// <https://html.spec.whatwg.org/#parsing-main-intabletext>
2838
InTableText,
39+
/// <https://html.spec.whatwg.org/#parsing-main-incaption>
2940
InCaption,
41+
/// <https://html.spec.whatwg.org/#parsing-main-incolgroup>
3042
InColumnGroup,
43+
/// <https://html.spec.whatwg.org/#parsing-main-intbody>
3144
InTableBody,
45+
/// <https://html.spec.whatwg.org/#parsing-main-intr>
3246
InRow,
47+
/// <https://html.spec.whatwg.org/#parsing-main-intd>
3348
InCell,
49+
/// <https://html.spec.whatwg.org/#parsing-main-intemplate>
3450
InTemplate,
51+
/// <https://html.spec.whatwg.org/#parsing-main-afterbody>
3552
AfterBody,
53+
/// <https://html.spec.whatwg.org/#parsing-main-inframeset>
3654
InFrameset,
55+
/// <https://html.spec.whatwg.org/#parsing-main-afterframeset>
3756
AfterFrameset,
57+
/// <https://html.spec.whatwg.org/#the-after-after-body-insertion-mode>
3858
AfterAfterBody,
59+
/// <https://html.spec.whatwg.org/#the-after-after-frameset-insertion-mode>
3960
AfterAfterFrameset,
4061
}
4162

0 commit comments

Comments
 (0)