run_test_suite_lib/output/
line.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::output::{
6    ArtifactType, DirectoryArtifactType, DynArtifact, DynDirectoryArtifact, EntityId, EntityInfo,
7    ReportedOutcome, Reporter, Timestamp,
8};
9use std::io::{Error, Write};
10use vte::{Params, Parser, Perform};
11
12/// A reporter that composes an inner reporter. Filters out ANSI in artifact output.
13pub(crate) struct AnsiFilterReporter<R: Reporter> {
14    inner: R,
15}
16
17impl<R: Reporter> AnsiFilterReporter<R> {
18    pub(crate) fn new(inner: R) -> Self {
19        Self { inner }
20    }
21}
22
23impl<R: Reporter> Reporter for AnsiFilterReporter<R> {
24    fn new_entity(&self, entity: &EntityId, name: &str) -> Result<(), Error> {
25        self.inner.new_entity(entity, name)
26    }
27
28    fn set_entity_info(&self, entity: &EntityId, info: &EntityInfo) {
29        self.inner.set_entity_info(entity, info)
30    }
31
32    fn entity_started(&self, entity: &EntityId, timestamp: Timestamp) -> Result<(), Error> {
33        self.inner.entity_started(entity, timestamp)
34    }
35
36    fn entity_stopped(
37        &self,
38        entity: &EntityId,
39        outcome: &ReportedOutcome,
40        timestamp: Timestamp,
41    ) -> Result<(), Error> {
42        self.inner.entity_stopped(entity, outcome, timestamp)
43    }
44
45    fn entity_finished(&self, entity: &EntityId) -> Result<(), Error> {
46        self.inner.entity_finished(entity)
47    }
48
49    fn new_artifact(
50        &self,
51        entity: &EntityId,
52        artifact_type: &ArtifactType,
53    ) -> Result<Box<DynArtifact>, Error> {
54        let inner_artifact = self.inner.new_artifact(entity, artifact_type)?;
55        match artifact_type {
56            // All the artifact types are enumerated here as we expect future artifacts
57            // should not be filtered.
58            ArtifactType::Stdout
59            | ArtifactType::Stderr
60            | ArtifactType::Syslog
61            | ArtifactType::RestrictedLog => {
62                Ok(Box::new(AnsiFilterWriter::new(inner_artifact)) as Box<DynArtifact>)
63            }
64        }
65    }
66
67    fn new_directory_artifact(
68        &self,
69        entity: &EntityId,
70        artifact_type: &DirectoryArtifactType,
71        component_moniker: Option<String>,
72    ) -> Result<Box<DynDirectoryArtifact>, Error> {
73        self.inner.new_directory_artifact(entity, artifact_type, component_moniker)
74    }
75}
76
77/// A wrapper around a `Write` that filters out ANSI escape sequences before writing to the
78/// wrapped object.
79/// AnsiFilterWriter assumes the bytes are valid UTF8, and clears its state on newline in an
80/// attempt to recover from malformed inputs.
81struct AnsiFilterWriter<W: Write> {
82    inner: W,
83    parser: Parser,
84}
85
86impl<W: Write> AnsiFilterWriter<W> {
87    pub fn new(inner: W) -> Self {
88        Self { inner, parser: Parser::new() }
89    }
90}
91
92impl<W: Write> Write for AnsiFilterWriter<W> {
93    fn write(&mut self, bytes: &[u8]) -> Result<usize, Error> {
94        // Per Rust docs write does not need to consume all the bytes, and
95        // each call to write should represent at most a single attempt to write.
96        // To be as close as possible to "a single attempt to write" we write only
97        // the first chunk of writable bytes.
98        let mut printable_range: Option<(usize, usize)> = None;
99
100        for (idx, byte) in bytes.iter().enumerate() {
101            let mut found = FoundChars::Nothing;
102            self.parser.advance(&mut found, &[*byte]);
103            if let &FoundChars::PrintableChars('\n') = &found {
104                self.parser = Parser::new();
105            }
106
107            match found {
108                FoundChars::Nothing => (),
109                FoundChars::PrintableChars(char::REPLACEMENT_CHARACTER) => {
110                    // replacement character needs to be handled specially since the invalid
111                    // bytes could be a different length than REPLACEMENT_CHARACTER.
112                    match printable_range {
113                        None => {
114                            self.inner.write_all("�".as_bytes())?;
115                            return Ok(idx + 1);
116                        }
117                        Some(range) => {
118                            // in this case, we don't know where the last "good" processed
119                            // byte after the writable range is, so write the known good range.
120                            // Return the index after the good range so that anything after is
121                            // reprocessed and hits the None case on subsequent write.
122                            self.inner.write_all(&bytes[range.0..range.1])?;
123                            return Ok(range.1);
124                        }
125                    }
126                }
127                FoundChars::PrintableChars(character) => {
128                    let character_len = character.len_utf8();
129                    match printable_range.as_mut() {
130                        // Character length could exceed the number of bytes we've processed in
131                        // this write if part of a multibyte UTF8 character was processed in a
132                        // previous call to write. In this case, we have to regenerate the
133                        // chacter in memory so write only it and return immediately.
134                        None if character_len > idx + 1 => {
135                            let mut buf = [0u8; 4];
136                            character.encode_utf8(&mut buf);
137                            self.inner.write_all(&buf[..character_len])?;
138                            return Ok(idx + 1);
139                        }
140                        None => {
141                            printable_range = Some((idx + 1 - character_len, idx + 1));
142                        }
143                        Some(range) if range.1 == idx + 1 - character_len => {
144                            range.1 = idx + 1;
145                        }
146                        // We've passed over a section of non-printable characters and found a new
147                        // section of printable characters. We'll write the first printable range,
148                        // and return the number of bytes until just before the new set of
149                        // printable characters. The next write will essentially process the new
150                        // printable character again. Since we already know the new printable
151                        // character is a valid UTF8 character, reprocessing it should be fine.
152                        Some(range) => {
153                            self.inner.write_all(&bytes[range.0..range.1])?;
154                            return Ok(idx + 1 - character_len);
155                        }
156                    }
157                }
158            }
159        }
160        if let Some(range) = printable_range {
161            self.inner.write_all(&bytes[range.0..range.1])?;
162        }
163        // If we reach this far, we have processed all the bytes.
164        Ok(bytes.len())
165    }
166
167    fn flush(&mut self) -> Result<(), Error> {
168        self.inner.flush()
169    }
170}
171
172/// An implementation of |Perform| that reports the characters found by the parser.
173enum FoundChars {
174    Nothing,
175    PrintableChars(char),
176}
177
178const PRINTABLE_COMMAND_CHARS: [u8; 3] = ['\r' as u8, '\t' as u8, '\n' as u8];
179
180impl Perform for FoundChars {
181    fn print(&mut self, c: char) {
182        *self = Self::PrintableChars(c);
183    }
184
185    fn execute(&mut self, code: u8) {
186        if PRINTABLE_COMMAND_CHARS.contains(&code) {
187            *self = Self::PrintableChars(code.into());
188        }
189    }
190    fn hook(&mut self, _: &Params, _: &[u8], _: bool, _: char) {}
191    fn put(&mut self, _: u8) {}
192    fn unhook(&mut self) {}
193    fn osc_dispatch(&mut self, _: &[&[u8]], _: bool) {}
194    fn csi_dispatch(&mut self, _: &Params, _: &[u8], _: bool, _: char) {}
195    fn esc_dispatch(&mut self, _: &[u8], _: bool, _: u8) {}
196}
197
198#[cfg(test)]
199mod test {
200    use super::*;
201    use ansi_term::{Color, Style};
202
203    #[test]
204    fn no_ansi_unaffected() {
205        let cases = vec![
206            "simple_case",
207            "\twhitespace\ncase\r",
208            "[INFO]: some log () <>",
209            "1",
210            "こんにちは",
211            "מבחן 15 מבחן 20",
212        ];
213
214        for case in cases {
215            let mut output: Vec<u8> = vec![];
216            let mut filter_writer = AnsiFilterWriter::new(&mut output);
217            filter_writer.write_all(case.as_bytes()).expect("write_all failed");
218            drop(filter_writer);
219
220            assert_eq!(case, String::from_utf8(output).expect("Failed to parse UTF8"),);
221        }
222    }
223
224    #[test]
225    fn ansi_filtered() {
226        let cases = vec![
227            (format!("{}", Color::Blue.paint("blue string")), "blue string"),
228            (format!("{}", Color::Blue.bold().paint("newline\nstr")), "newline\nstr"),
229            (format!("{}", Color::Blue.bold().paint("tab\tstr")), "tab\tstr"),
230            (format!("{}", Style::new().bold().paint("bold")), "bold"),
231            (
232                format!(
233                    "{} {}",
234                    Style::new().bold().paint("bold"),
235                    Style::new().bold().paint("bold-2")
236                ),
237                "bold bold-2",
238            ),
239            (format!("{}", Style::new().bold().paint("")), ""),
240            (format!("no format, {}", Color::Blue.paint("format")), "no format, format"),
241        ];
242
243        for (case, expected) in cases {
244            let mut output: Vec<u8> = vec![];
245            let mut filter_writer = AnsiFilterWriter::new(&mut output);
246            write!(filter_writer, "{}", case).expect("write failed");
247
248            drop(filter_writer);
249            assert_eq!(expected, String::from_utf8(output).expect("Couldn't parse utf8"));
250        }
251    }
252
253    #[test]
254    fn ansi_partial_utf8_write() {
255        // Verify that if a multibyte UTF8 character gets split across two writes, the character
256        // is passed through.
257        let cases = vec![
258            "ß", // 2 byte character
259            "beforeßafter",
260            "ßafter",
261            "beforeßafter",
262            "¥", // 3 byte character
263            "before¥after",
264            "¥after",
265            "before¥",
266            "💝", // 4 byte character
267            "before💝after",
268            "💝after",
269            "before💝",
270        ];
271
272        for case in cases {
273            let bytes = case.as_bytes();
274            for split_point in 1..bytes.len() {
275                let mut output: Vec<u8> = vec![];
276                let mut filter_writer = AnsiFilterWriter::new(&mut output);
277
278                filter_writer.write_all(&bytes[..split_point]).expect("write slice");
279                filter_writer.write_all(&bytes[split_point..]).expect("write slice");
280                assert_eq!(
281                    output, bytes,
282                    "Failed on case {} split on byte {:?}",
283                    case, split_point
284                );
285            }
286        }
287    }
288
289    #[test]
290    fn ansi_handle_invalid_utf8() {
291        // invalid bytes constructed according to rules in
292        // https://en.wikipedia.org/wiki/UTF-8#Encoding
293        const TWO_BYTE: [u8; 2] = [0xC2u8, 0xC2u8];
294        const THREE_BYTE: [u8; 3] = [0xE0u8, 0xA0u8, 0xC2u8];
295        const FOUR_BYTE: [u8; 4] = [0xF0u8, 0xA0u8, 0x82u8, 0xC2u8];
296
297        let cases = vec![
298            ([b"string".as_slice(), TWO_BYTE.as_slice()].concat(), "string��"),
299            ([b"string".as_slice(), THREE_BYTE.as_slice()].concat(), "string��"),
300            ([b"string".as_slice(), FOUR_BYTE.as_slice()].concat(), "string��"),
301            ([TWO_BYTE.as_slice(), b"string".as_slice()].concat(), "�string"),
302            ([THREE_BYTE.as_slice(), b"string".as_slice()].concat(), "�string"),
303            ([FOUR_BYTE.as_slice(), b"string".as_slice()].concat(), "�string"),
304        ];
305
306        for (bytes, expected) in cases {
307            let mut output: Vec<u8> = vec![];
308            let mut filter_writer = AnsiFilterWriter::new(&mut output);
309
310            filter_writer.write_all(&bytes).expect("write slice");
311            assert_eq!(
312                output,
313                expected.as_bytes(),
314                "Failed on case {:?}, expected string {}",
315                bytes,
316                expected
317            );
318        }
319    }
320
321    #[test]
322    fn ansi_multiline_filtered() {
323        // Ensure ansi escapes passed through multiple lines still filtered.
324        let multiline = format!("{}", Color::Blue.paint("multiline\nstring"));
325        let split = multiline.split_ascii_whitespace().collect::<Vec<_>>();
326        assert_eq!(split.len(), 2);
327
328        let mut output: Vec<u8> = vec![];
329        let mut filter_writer = AnsiFilterWriter::new(&mut output);
330
331        for s in split {
332            writeln!(filter_writer, "{}", s).expect("write failed");
333        }
334
335        drop(filter_writer);
336        assert_eq!("multiline\nstring\n", String::from_utf8(output).expect("Couldn't parse utf8"));
337    }
338
339    #[test]
340    fn malformed_ansi_contained() {
341        // Ensure malformed ansi is contained to a single line
342        let malformed = "\u{1b}[31mmalformed\u{1b}\n";
343        let okay = format!("{}\n", Color::Blue.paint("okay"));
344
345        let mut output: Vec<u8> = vec![];
346        let mut filter_writer = AnsiFilterWriter::new(&mut output);
347
348        filter_writer.write_all(malformed.as_bytes()).expect("write_all failed");
349        filter_writer.write_all(okay.as_bytes()).expect("write_all failed");
350        drop(filter_writer);
351        assert_eq!("malformed\nokay\n", String::from_utf8(output).expect("Couldn't parse utf8"));
352    }
353
354    /// A |Write| implementation that only partially writes a buffer on write().
355    struct PartialWriter<W: Write>(W);
356    const PARTIAL_WRITE_BYTES: usize = 3;
357
358    impl<W: Write> Write for PartialWriter<W> {
359        fn write(&mut self, bytes: &[u8]) -> Result<usize, Error> {
360            let slice_to_write = if bytes.len() < PARTIAL_WRITE_BYTES {
361                bytes
362            } else {
363                &bytes[..PARTIAL_WRITE_BYTES]
364            };
365            self.0.write_all(slice_to_write)?;
366            Ok(slice_to_write.len())
367        }
368
369        fn flush(&mut self) -> Result<(), Error> {
370            Ok(())
371        }
372    }
373
374    #[test]
375    fn ansi_filter_inner_partial_write() {
376        let cases = vec![
377            (format!("{}", Color::Blue.paint("multiline\nstring")), "multiline\nstring"),
378            ("simple no ansi".to_string(), "simple no ansi"),
379            ("a\nb\nc\nd".to_string(), "a\nb\nc\nd"),
380        ];
381
382        for (unfiltered, filtered) in cases.iter() {
383            let mut output: Vec<u8> = vec![];
384            let mut filter_writer = AnsiFilterWriter::new(PartialWriter(&mut output));
385            filter_writer.write_all(unfiltered.as_bytes()).expect("write all");
386            drop(filter_writer);
387            assert_eq!(&String::from_utf8(output).expect("couldn't parse UTF8"), filtered);
388        }
389    }
390}