ciborium_ll/
seg.rs

1use super::*;
2
3use ciborium_io::Read;
4
5use core::marker::PhantomData;
6
7/// A parser for incoming segments
8pub trait Parser: Default {
9    /// The type of item that is parsed
10    type Item: ?Sized;
11
12    /// The parsing error that may occur
13    type Error;
14
15    /// The main parsing function
16    ///
17    /// This function processes the incoming bytes and returns the item.
18    ///
19    /// One important detail that **MUST NOT** be overlooked is that the
20    /// parser may save data from a previous parsing attempt. The number of
21    /// bytes saved is indicated by the `Parser::saved()` function. The saved
22    /// bytes will be copied into the beginning of the `bytes` array before
23    /// processing. Therefore, two requirements should be met.
24    ///
25    /// First, the incoming byte slice should be larger than the saved bytes.
26    ///
27    /// Second, the incoming byte slice should contain new bytes only after
28    /// the saved byte prefix.
29    ///
30    /// If both criteria are met, this allows the parser to prepend its saved
31    /// bytes without any additional allocation.
32    fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
33
34    /// Indicates the number of saved bytes in the parser
35    fn saved(&self) -> usize {
36        0
37    }
38}
39
40/// A bytes parser
41///
42/// No actual processing is performed and the input bytes are directly
43/// returned. This implies that this parser never saves any bytes internally.
44#[derive(Default)]
45pub struct Bytes(());
46
47impl Parser for Bytes {
48    type Item = [u8];
49    type Error = core::convert::Infallible;
50
51    fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
52        Ok(bytes)
53    }
54}
55
56/// A text parser
57///
58/// This parser converts the input bytes to a `str`. This parser preserves
59/// trailing invalid UTF-8 sequences in the case that chunking fell in the
60/// middle of a valid UTF-8 character.
61#[derive(Default)]
62pub struct Text {
63    stored: usize,
64    buffer: [u8; 3],
65}
66
67impl Parser for Text {
68    type Item = str;
69    type Error = core::str::Utf8Error;
70
71    fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
72        // If we cannot advance, return nothing.
73        if bytes.len() <= self.stored {
74            return Ok("");
75        }
76
77        // Copy previously invalid data into place.
78        bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
79
80        Ok(match core::str::from_utf8(bytes) {
81            Ok(s) => {
82                self.stored = 0;
83                s
84            }
85            Err(e) => {
86                let valid_len = e.valid_up_to();
87                let invalid_len = bytes.len() - valid_len;
88
89                // If the size of the invalid UTF-8 is large enough to hold
90                // all valid UTF-8 characters, we have a syntax error.
91                if invalid_len > self.buffer.len() {
92                    return Err(e);
93                }
94
95                // Otherwise, store the invalid bytes for the next read cycle.
96                self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
97                self.stored = invalid_len;
98
99                // Decode the valid part of the string.
100                core::str::from_utf8(&bytes[..valid_len]).unwrap()
101            }
102        })
103    }
104
105    fn saved(&self) -> usize {
106        self.stored
107    }
108}
109
110/// A CBOR segment
111///
112/// This type represents a single bytes or text segment on the wire. It can be
113/// read out in parsed chunks based on the size of the input scratch buffer.
114pub struct Segment<'r, R: Read, P: Parser> {
115    reader: &'r mut Decoder<R>,
116    unread: usize,
117    offset: usize,
118    parser: P,
119}
120
121impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
122    /// Gets the number of unprocessed bytes
123    #[inline]
124    pub fn left(&self) -> usize {
125        self.unread + self.parser.saved()
126    }
127
128    /// Gets the next parsed chunk within the segment
129    ///
130    /// Returns `Ok(None)` when all chunks have been read.
131    #[inline]
132    pub fn pull<'a>(
133        &mut self,
134        buffer: &'a mut [u8],
135    ) -> Result<Option<&'a P::Item>, Error<R::Error>> {
136        use core::cmp::min;
137
138        let prev = self.parser.saved();
139        match self.unread {
140            0 if prev == 0 => return Ok(None),
141            0 => return Err(Error::Syntax(self.offset)),
142            _ => (),
143        }
144
145        // Determine how many bytes to read.
146        let size = min(buffer.len(), prev + self.unread);
147        let full = &mut buffer[..size];
148        let next = &mut full[min(size, prev)..];
149
150        // Read additional bytes.
151        self.reader.read_exact(next)?;
152        self.unread -= next.len();
153
154        self.parser
155            .parse(full)
156            .or(Err(Error::Syntax(self.offset)))
157            .map(Some)
158    }
159}
160
161/// A sequence of CBOR segments
162///
163/// CBOR allows for bytes or text items to be segmented. This type represents
164/// the state of that segmented input stream.
165pub struct Segments<'r, R: Read, P: Parser> {
166    reader: &'r mut Decoder<R>,
167    finish: bool,
168    nested: usize,
169    parser: PhantomData<P>,
170    unwrap: fn(Header) -> Result<Option<usize>, ()>,
171}
172
173impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
174    #[inline]
175    pub(crate) fn new(
176        decoder: &'r mut Decoder<R>,
177        unwrap: fn(Header) -> Result<Option<usize>, ()>,
178    ) -> Self {
179        Self {
180            reader: decoder,
181            finish: false,
182            nested: 0,
183            parser: PhantomData,
184            unwrap,
185        }
186    }
187
188    /// Gets the next segment in the stream
189    ///
190    /// Returns `Ok(None)` at the conclusion of the stream.
191    #[inline]
192    pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
193        while !self.finish {
194            let offset = self.reader.offset();
195            match self.reader.pull()? {
196                Header::Break if self.nested == 1 => return Ok(None),
197                Header::Break if self.nested > 1 => self.nested -= 1,
198                header => match (self.unwrap)(header) {
199                    Err(..) => return Err(Error::Syntax(offset)),
200                    Ok(None) => self.nested += 1,
201                    Ok(Some(len)) => {
202                        self.finish = self.nested == 0;
203                        return Ok(Some(Segment {
204                            reader: self.reader,
205                            unread: len,
206                            offset,
207                            parser: P::default(),
208                        }));
209                    }
210                },
211            }
212        }
213
214        Ok(None)
215    }
216}