xml/reader/parser/
inside_processing_instruction.rs

1use common::{
2    is_name_start_char, is_name_char,
3};
4
5use reader::events::XmlEvent;
6use reader::lexer::Token;
7
8use super::{Result, PullParser, State, ProcessingInstructionSubstate, DeclarationSubstate};
9
10impl PullParser {
11    pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> {
12        match s {
13            ProcessingInstructionSubstate::PIInsideName => match t {
14                Token::Character(c) if !self.buf_has_data() && is_name_start_char(c) ||
15                                 self.buf_has_data() && is_name_char(c) => self.append_char_continue(c),
16
17                Token::ProcessingInstructionEnd => {
18                    // self.buf contains PI name
19                    let name = self.take_buf();
20
21                    // Don't need to check for declaration because it has mandatory attributes
22                    // but there is none
23                    match &name[..] {
24                        // Name is empty, it is an error
25                        "" => Some(self_error!(self; "Encountered processing instruction without name")),
26
27                        // Found <?xml-like PI not at the beginning of a document,
28                        // it is an error - see section 2.6 of XML 1.1 spec
29                        "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML" =>
30                            Some(self_error!(self; "Invalid processing instruction: <?{}", name)),
31
32                        // All is ok, emitting event
33                        _ => {
34                            self.into_state_emit(
35                                State::OutsideTag,
36                                Ok(XmlEvent::ProcessingInstruction {
37                                    name: name,
38                                    data: None
39                                })
40                            )
41                        }
42                    }
43                }
44
45                Token::Whitespace(_) => {
46                    // self.buf contains PI name
47                    let name = self.take_buf();
48
49                    match &name[..] {
50                        // We have not ever encountered an element and have not parsed XML declaration
51                        "xml" if !self.encountered_element && !self.parsed_declaration =>
52                            self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)),
53
54                        // Found <?xml-like PI after the beginning of a document,
55                        // it is an error - see section 2.6 of XML 1.1 spec
56                        "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML"
57                            if self.encountered_element || self.parsed_declaration =>
58                            Some(self_error!(self; "Invalid processing instruction: <?{}", name)),
59
60                        // All is ok, starting parsing PI data
61                        _ => {
62                            self.lexer.disable_errors();  // data is arbitrary, so disable errors
63                            self.data.name = name;
64                            self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData))
65                        }
66
67                    }
68                }
69
70                _ => Some(self_error!(self; "Unexpected token: <?{}{}", self.buf, t))
71            },
72
73            ProcessingInstructionSubstate::PIInsideData => match t {
74                Token::ProcessingInstructionEnd => {
75                    self.lexer.enable_errors();
76                    let name = self.data.take_name();
77                    let data = self.take_buf();
78                    self.into_state_emit(
79                        State::OutsideTag,
80                        Ok(XmlEvent::ProcessingInstruction {
81                            name: name,
82                            data: Some(data)
83                        })
84                    )
85                },
86
87                // Any other token should be treated as plain characters
88                _ => {
89                    t.push_to_string(&mut self.buf);
90                    None
91                }
92            },
93        }
94    }
95
96}