bstr/
ext_slice.rs

1use core::{iter, slice, str};
2
3#[cfg(all(feature = "alloc", feature = "unicode"))]
4use alloc::vec;
5#[cfg(feature = "alloc")]
6use alloc::{borrow::Cow, string::String, vec::Vec};
7
8#[cfg(feature = "std")]
9use std::{ffi::OsStr, path::Path};
10
11use memchr::{memchr, memmem, memrchr};
12
13use crate::escape_bytes::EscapeBytes;
14#[cfg(feature = "alloc")]
15use crate::ext_vec::ByteVec;
16#[cfg(feature = "unicode")]
17use crate::unicode::{
18    whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
19    SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
20    WordsWithBreaks,
21};
22use crate::{
23    ascii,
24    bstr::BStr,
25    byteset,
26    utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error},
27};
28
29/// A short-hand constructor for building a `&[u8]`.
30///
31/// This idiosyncratic constructor is useful for concisely building byte string
32/// slices. Its primary utility is in conveniently writing byte string literals
33/// in a uniform way. For example, consider this code that does not compile:
34///
35/// ```ignore
36/// let strs = vec![b"a", b"xy"];
37/// ```
38///
39/// The above code doesn't compile because the type of the byte string literal
40/// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
41/// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
42/// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
43/// where both `"a"` and `"xy"` have the same type of `&'static str`.)
44///
45/// One way of getting the above code to compile is to convert byte strings to
46/// slices. You might try this:
47///
48/// ```ignore
49/// let strs = vec![&b"a", &b"xy"];
50/// ```
51///
52/// But this just creates values with type `& &'static [u8; 1]` and
53/// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
54///
55/// ```
56/// let strs = vec![&b"a"[..], &b"xy"[..]];
57/// // or
58/// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
59/// ```
60///
61/// But neither of these are particularly convenient to type, especially when
62/// it's something as common as a string literal. Thus, this constructor
63/// permits writing the following instead:
64///
65/// ```
66/// use bstr::B;
67///
68/// let strs = vec![B("a"), B(b"xy")];
69/// ```
70///
71/// Notice that this also lets you mix and match both string literals and byte
72/// string literals. This can be quite convenient!
73#[allow(non_snake_case)]
74#[inline]
75pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {
76    bytes.as_ref()
77}
78
79impl ByteSlice for [u8] {
80    #[inline]
81    fn as_bytes(&self) -> &[u8] {
82        self
83    }
84
85    #[inline]
86    fn as_bytes_mut(&mut self) -> &mut [u8] {
87        self
88    }
89}
90
91impl<const N: usize> ByteSlice for [u8; N] {
92    #[inline]
93    fn as_bytes(&self) -> &[u8] {
94        self
95    }
96
97    #[inline]
98    fn as_bytes_mut(&mut self) -> &mut [u8] {
99        self
100    }
101}
102
103/// Ensure that callers cannot implement `ByteSlice` by making an
104/// umplementable trait its super trait.
105mod private {
106    pub trait Sealed {}
107}
108impl private::Sealed for [u8] {}
109impl<const N: usize> private::Sealed for [u8; N] {}
110
111/// A trait that extends `&[u8]` with string oriented methods.
112///
113/// This trait is sealed and cannot be implemented outside of `bstr`.
114pub trait ByteSlice: private::Sealed {
115    /// A method for accessing the raw bytes of this type. This is always a
116    /// no-op and callers shouldn't care about it. This only exists for making
117    /// the extension trait work.
118    #[doc(hidden)]
119    fn as_bytes(&self) -> &[u8];
120
121    /// A method for accessing the raw bytes of this type, mutably. This is
122    /// always a no-op and callers shouldn't care about it. This only exists
123    /// for making the extension trait work.
124    #[doc(hidden)]
125    fn as_bytes_mut(&mut self) -> &mut [u8];
126
127    /// Return this byte slice as a `&BStr`.
128    ///
129    /// Use `&BStr` is useful because of its `fmt::Debug` representation
130    /// and various other trait implementations (such as `PartialEq` and
131    /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
132    /// shows its bytes as a normal string. For invalid UTF-8, hex escape
133    /// sequences are used.
134    ///
135    /// # Examples
136    ///
137    /// Basic usage:
138    ///
139    /// ```
140    /// use bstr::ByteSlice;
141    ///
142    /// println!("{:?}", b"foo\xFFbar".as_bstr());
143    /// ```
144    #[inline]
145    fn as_bstr(&self) -> &BStr {
146        BStr::new(self.as_bytes())
147    }
148
149    /// Return this byte slice as a `&mut BStr`.
150    ///
151    /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
152    /// and various other trait implementations (such as `PartialEq` and
153    /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
154    /// shows its bytes as a normal string. For invalid UTF-8, hex escape
155    /// sequences are used.
156    ///
157    /// # Examples
158    ///
159    /// Basic usage:
160    ///
161    /// ```
162    /// use bstr::ByteSlice;
163    ///
164    /// let mut bytes = *b"foo\xFFbar";
165    /// println!("{:?}", &mut bytes.as_bstr_mut());
166    /// ```
167    #[inline]
168    fn as_bstr_mut(&mut self) -> &mut BStr {
169        BStr::new_mut(self.as_bytes_mut())
170    }
171
172    /// Create an immutable byte string from an OS string slice.
173    ///
174    /// When the underlying bytes of OS strings are accessible, then this
175    /// always succeeds and is zero cost. Otherwise, this returns `None` if the
176    /// given OS string is not valid UTF-8. (For example, when the underlying
177    /// bytes are inaccessible on Windows, file paths are allowed to be a
178    /// sequence of arbitrary 16-bit integers. Not all such sequences can be
179    /// transcoded to valid UTF-8.)
180    ///
181    /// # Examples
182    ///
183    /// Basic usage:
184    ///
185    /// ```
186    /// use std::ffi::OsStr;
187    ///
188    /// use bstr::{B, ByteSlice};
189    ///
190    /// let os_str = OsStr::new("foo");
191    /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
192    /// assert_eq!(bs, B("foo"));
193    /// ```
194    #[cfg(feature = "std")]
195    #[inline]
196    fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
197        #[cfg(unix)]
198        #[inline]
199        fn imp(os_str: &OsStr) -> Option<&[u8]> {
200            use std::os::unix::ffi::OsStrExt;
201
202            Some(os_str.as_bytes())
203        }
204
205        #[cfg(not(unix))]
206        #[inline]
207        fn imp(os_str: &OsStr) -> Option<&[u8]> {
208            os_str.to_str().map(|s| s.as_bytes())
209        }
210
211        imp(os_str)
212    }
213
214    /// Create an immutable byte string from a file path.
215    ///
216    /// When the underlying bytes of paths are accessible, then this always
217    /// succeeds and is zero cost. Otherwise, this returns `None` if the given
218    /// path is not valid UTF-8. (For example, when the underlying bytes are
219    /// inaccessible on Windows, file paths are allowed to be a sequence of
220    /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
221    /// valid UTF-8.)
222    ///
223    /// # Examples
224    ///
225    /// Basic usage:
226    ///
227    /// ```
228    /// use std::path::Path;
229    ///
230    /// use bstr::{B, ByteSlice};
231    ///
232    /// let path = Path::new("foo");
233    /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
234    /// assert_eq!(bs, B("foo"));
235    /// ```
236    #[cfg(feature = "std")]
237    #[inline]
238    fn from_path(path: &Path) -> Option<&[u8]> {
239        Self::from_os_str(path.as_os_str())
240    }
241
242    /// Safely convert this byte string into a `&str` if it's valid UTF-8.
243    ///
244    /// If this byte string is not valid UTF-8, then an error is returned. The
245    /// error returned indicates the first invalid byte found and the length
246    /// of the error.
247    ///
248    /// In cases where a lossy conversion to `&str` is acceptable, then use one
249    /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
250    /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
251    /// methods.
252    ///
253    /// # Examples
254    ///
255    /// Basic usage:
256    ///
257    /// ```
258    /// # #[cfg(feature = "alloc")] {
259    /// use bstr::{B, ByteSlice, ByteVec};
260    ///
261    /// # fn example() -> Result<(), bstr::Utf8Error> {
262    /// let s = B("☃βツ").to_str()?;
263    /// assert_eq!("☃βツ", s);
264    ///
265    /// let mut bstring = <Vec<u8>>::from("☃βツ");
266    /// bstring.push(b'\xFF');
267    /// let err = bstring.to_str().unwrap_err();
268    /// assert_eq!(8, err.valid_up_to());
269    /// # Ok(()) }; example().unwrap()
270    /// # }
271    /// ```
272    #[inline]
273    fn to_str(&self) -> Result<&str, Utf8Error> {
274        utf8::validate(self.as_bytes()).map(|_| {
275            // SAFETY: This is safe because of the guarantees provided by
276            // utf8::validate.
277            unsafe { str::from_utf8_unchecked(self.as_bytes()) }
278        })
279    }
280
281    /// Unsafely convert this byte string into a `&str`, without checking for
282    /// valid UTF-8.
283    ///
284    /// # Safety
285    ///
286    /// Callers *must* ensure that this byte string is valid UTF-8 before
287    /// calling this method. Converting a byte string into a `&str` that is
288    /// not valid UTF-8 is considered undefined behavior.
289    ///
290    /// This routine is useful in performance sensitive contexts where the
291    /// UTF-8 validity of the byte string is already known and it is
292    /// undesirable to pay the cost of an additional UTF-8 validation check
293    /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
294    ///
295    /// # Examples
296    ///
297    /// Basic usage:
298    ///
299    /// ```
300    /// use bstr::{B, ByteSlice};
301    ///
302    /// // SAFETY: This is safe because string literals are guaranteed to be
303    /// // valid UTF-8 by the Rust compiler.
304    /// let s = unsafe { B("☃βツ").to_str_unchecked() };
305    /// assert_eq!("☃βツ", s);
306    /// ```
307    #[inline]
308    unsafe fn to_str_unchecked(&self) -> &str {
309        str::from_utf8_unchecked(self.as_bytes())
310    }
311
312    /// Convert this byte string to a valid UTF-8 string by replacing invalid
313    /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
314    ///
315    /// If the byte string is already valid UTF-8, then no copying or
316    /// allocation is performed and a borrrowed string slice is returned. If
317    /// the byte string is not valid UTF-8, then an owned string buffer is
318    /// returned with invalid bytes replaced by the replacement codepoint.
319    ///
320    /// This method uses the "substitution of maximal subparts" (Unicode
321    /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
322    /// codepoint. Specifically, a replacement codepoint is inserted whenever a
323    /// byte is found that cannot possibly lead to a valid code unit sequence.
324    /// If there were previous bytes that represented a prefix of a well-formed
325    /// code unit sequence, then all of those bytes are substituted with a
326    /// single replacement codepoint. The "substitution of maximal subparts"
327    /// strategy is the same strategy used by
328    /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
329    /// For a more precise description of the maximal subpart strategy, see
330    /// the Unicode Standard, Chapter 3, Section 9. See also
331    /// [Public Review Issue #121](https://www.unicode.org/review/pr-121.html).
332    ///
333    /// N.B. Rust's standard library also appears to use the same strategy,
334    /// but it does not appear to be an API guarantee.
335    ///
336    /// # Examples
337    ///
338    /// Basic usage:
339    ///
340    /// ```
341    /// use std::borrow::Cow;
342    ///
343    /// use bstr::ByteSlice;
344    ///
345    /// let mut bstring = <Vec<u8>>::from("☃βツ");
346    /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
347    ///
348    /// // Add a byte that makes the sequence invalid.
349    /// bstring.push(b'\xFF');
350    /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
351    /// ```
352    ///
353    /// This demonstrates the "maximal subpart" substitution logic.
354    ///
355    /// ```
356    /// use bstr::{B, ByteSlice};
357    ///
358    /// // \x61 is the ASCII codepoint for 'a'.
359    /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
360    /// // \xE1\x80 is a valid 2-byte code unit prefix.
361    /// // \xC2 is a valid 1-byte code unit prefix.
362    /// // \x62 is the ASCII codepoint for 'b'.
363    /// //
364    /// // In sum, each of the prefixes is replaced by a single replacement
365    /// // codepoint since none of the prefixes are properly completed. This
366    /// // is in contrast to other strategies that might insert a replacement
367    /// // codepoint for every single byte.
368    /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
369    /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
370    /// ```
371    #[cfg(feature = "alloc")]
372    #[inline]
373    fn to_str_lossy(&self) -> Cow<'_, str> {
374        match utf8::validate(self.as_bytes()) {
375            Ok(()) => {
376                // SAFETY: This is safe because of the guarantees provided by
377                // utf8::validate.
378                unsafe {
379                    Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
380                }
381            }
382            Err(err) => {
383                let mut lossy = String::with_capacity(self.as_bytes().len());
384                let (valid, after) =
385                    self.as_bytes().split_at(err.valid_up_to());
386                // SAFETY: This is safe because utf8::validate guarantees
387                // that all of `valid` is valid UTF-8.
388                lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
389                lossy.push_str("\u{FFFD}");
390                if let Some(len) = err.error_len() {
391                    after[len..].to_str_lossy_into(&mut lossy);
392                }
393                Cow::Owned(lossy)
394            }
395        }
396    }
397
398    /// Copy the contents of this byte string into the given owned string
399    /// buffer, while replacing invalid UTF-8 code unit sequences with the
400    /// Unicode replacement codepoint (`U+FFFD`).
401    ///
402    /// This method uses the same "substitution of maximal subparts" strategy
403    /// for inserting the replacement codepoint as the
404    /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
405    ///
406    /// This routine is useful for amortizing allocation. However, unlike
407    /// `to_str_lossy`, this routine will _always_ copy the contents of this
408    /// byte string into the destination buffer, even if this byte string is
409    /// valid UTF-8.
410    ///
411    /// # Examples
412    ///
413    /// Basic usage:
414    ///
415    /// ```
416    /// use std::borrow::Cow;
417    ///
418    /// use bstr::ByteSlice;
419    ///
420    /// let mut bstring = <Vec<u8>>::from("☃βツ");
421    /// // Add a byte that makes the sequence invalid.
422    /// bstring.push(b'\xFF');
423    ///
424    /// let mut dest = String::new();
425    /// bstring.to_str_lossy_into(&mut dest);
426    /// assert_eq!("☃βツ\u{FFFD}", dest);
427    /// ```
428    #[cfg(feature = "alloc")]
429    #[inline]
430    fn to_str_lossy_into(&self, dest: &mut String) {
431        let mut bytes = self.as_bytes();
432        dest.reserve(bytes.len());
433        loop {
434            match utf8::validate(bytes) {
435                Ok(()) => {
436                    // SAFETY: This is safe because utf8::validate guarantees
437                    // that all of `bytes` is valid UTF-8.
438                    dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
439                    break;
440                }
441                Err(err) => {
442                    let (valid, after) = bytes.split_at(err.valid_up_to());
443                    // SAFETY: This is safe because utf8::validate guarantees
444                    // that all of `valid` is valid UTF-8.
445                    dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
446                    dest.push_str("\u{FFFD}");
447                    match err.error_len() {
448                        None => break,
449                        Some(len) => bytes = &after[len..],
450                    }
451                }
452            }
453        }
454    }
455
456    /// Create an OS string slice from this byte string.
457    ///
458    /// When OS strings can be constructed from arbitrary byte sequences, this
459    /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
460    /// decoding error if this byte string is not valid UTF-8. (For example,
461    /// assuming the representation of `OsStr` is opaque on Windows, file paths
462    /// are allowed to be a sequence of arbitrary 16-bit integers. There is
463    /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
464    /// arbitrary sequence of 16-bit integers. If the representation of `OsStr`
465    /// is even opened up, then this will convert any sequence of bytes to an
466    /// `OsStr` without cost.)
467    ///
468    /// # Examples
469    ///
470    /// Basic usage:
471    ///
472    /// ```
473    /// use bstr::{B, ByteSlice};
474    ///
475    /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
476    /// assert_eq!(os_str, "foo");
477    /// ```
478    #[cfg(feature = "std")]
479    #[inline]
480    fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
481        #[cfg(unix)]
482        #[inline]
483        fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
484            use std::os::unix::ffi::OsStrExt;
485
486            Ok(OsStr::from_bytes(bytes))
487        }
488
489        #[cfg(not(unix))]
490        #[inline]
491        fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
492            bytes.to_str().map(OsStr::new)
493        }
494
495        imp(self.as_bytes())
496    }
497
498    /// Lossily create an OS string slice from this byte string.
499    ///
500    /// When OS strings can be constructed from arbitrary byte sequences, this
501    /// is zero cost and always returns a slice. Otherwise, this will perform a
502    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
503    /// the Unicode replacement codepoint.
504    ///
505    /// Note that this can prevent the correct roundtripping of file paths when
506    /// the representation of `OsStr` is opaque.
507    ///
508    /// # Examples
509    ///
510    /// Basic usage:
511    ///
512    /// ```
513    /// use bstr::ByteSlice;
514    ///
515    /// let os_str = b"foo\xFFbar".to_os_str_lossy();
516    /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
517    /// ```
518    #[cfg(feature = "std")]
519    #[inline]
520    fn to_os_str_lossy(&self) -> Cow<'_, OsStr> {
521        #[cfg(unix)]
522        #[inline]
523        fn imp(bytes: &[u8]) -> Cow<'_, OsStr> {
524            use std::os::unix::ffi::OsStrExt;
525
526            Cow::Borrowed(OsStr::from_bytes(bytes))
527        }
528
529        #[cfg(not(unix))]
530        #[inline]
531        fn imp(bytes: &[u8]) -> Cow<OsStr> {
532            use std::ffi::OsString;
533
534            match bytes.to_str_lossy() {
535                Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
536                Cow::Owned(x) => Cow::Owned(OsString::from(x)),
537            }
538        }
539
540        imp(self.as_bytes())
541    }
542
543    /// Create a path slice from this byte string.
544    ///
545    /// When paths can be constructed from arbitrary byte sequences, this
546    /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
547    /// decoding error if this byte string is not valid UTF-8. (For example,
548    /// assuming the representation of `Path` is opaque on Windows, file paths
549    /// are allowed to be a sequence of arbitrary 16-bit integers. There is
550    /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
551    /// arbitrary sequence of 16-bit integers. If the representation of `Path`
552    /// is even opened up, then this will convert any sequence of bytes to an
553    /// `Path` without cost.)
554    ///
555    /// # Examples
556    ///
557    /// Basic usage:
558    ///
559    /// ```
560    /// use bstr::ByteSlice;
561    ///
562    /// let path = b"foo".to_path().expect("should be valid UTF-8");
563    /// assert_eq!(path.as_os_str(), "foo");
564    /// ```
565    #[cfg(feature = "std")]
566    #[inline]
567    fn to_path(&self) -> Result<&Path, Utf8Error> {
568        self.to_os_str().map(Path::new)
569    }
570
571    /// Lossily create a path slice from this byte string.
572    ///
573    /// When paths can be constructed from arbitrary byte sequences, this is
574    /// zero cost and always returns a slice. Otherwise, this will perform a
575    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
576    /// the Unicode replacement codepoint.
577    ///
578    /// Note that this can prevent the correct roundtripping of file paths when
579    /// the representation of `Path` is opaque.
580    ///
581    /// # Examples
582    ///
583    /// Basic usage:
584    ///
585    /// ```
586    /// use bstr::ByteSlice;
587    ///
588    /// let bs = b"foo\xFFbar";
589    /// let path = bs.to_path_lossy();
590    /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
591    /// ```
592    #[cfg(feature = "std")]
593    #[inline]
594    fn to_path_lossy(&self) -> Cow<'_, Path> {
595        use std::path::PathBuf;
596
597        match self.to_os_str_lossy() {
598            Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
599            Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
600        }
601    }
602
603    /// Create a new byte string by repeating this byte string `n` times.
604    ///
605    /// # Panics
606    ///
607    /// This function panics if the capacity of the new byte string would
608    /// overflow.
609    ///
610    /// # Examples
611    ///
612    /// Basic usage:
613    ///
614    /// ```
615    /// use bstr::{B, ByteSlice};
616    ///
617    /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
618    /// assert_eq!(b"foo".repeatn(0), B(""));
619    /// ```
620    #[cfg(feature = "alloc")]
621    #[inline]
622    fn repeatn(&self, n: usize) -> Vec<u8> {
623        self.as_bytes().repeat(n)
624    }
625
626    /// Returns true if and only if this byte string contains the given needle.
627    ///
628    /// # Examples
629    ///
630    /// Basic usage:
631    ///
632    /// ```
633    /// use bstr::ByteSlice;
634    ///
635    /// assert!(b"foo bar".contains_str("foo"));
636    /// assert!(b"foo bar".contains_str("bar"));
637    /// assert!(!b"foo".contains_str("foobar"));
638    /// ```
639    #[inline]
640    fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
641        self.find(needle).is_some()
642    }
643
644    /// Returns true if and only if this byte string has the given prefix.
645    ///
646    /// # Examples
647    ///
648    /// Basic usage:
649    ///
650    /// ```
651    /// use bstr::ByteSlice;
652    ///
653    /// assert!(b"foo bar".starts_with_str("foo"));
654    /// assert!(!b"foo bar".starts_with_str("bar"));
655    /// assert!(!b"foo".starts_with_str("foobar"));
656    /// ```
657    #[inline]
658    fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
659        self.as_bytes().starts_with(prefix.as_ref())
660    }
661
662    /// Returns true if and only if this byte string has the given suffix.
663    ///
664    /// # Examples
665    ///
666    /// Basic usage:
667    ///
668    /// ```
669    /// use bstr::ByteSlice;
670    ///
671    /// assert!(b"foo bar".ends_with_str("bar"));
672    /// assert!(!b"foo bar".ends_with_str("foo"));
673    /// assert!(!b"bar".ends_with_str("foobar"));
674    /// ```
675    #[inline]
676    fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
677        self.as_bytes().ends_with(suffix.as_ref())
678    }
679
680    /// Returns the index of the first occurrence of the given needle.
681    ///
682    /// The needle may be any type that can be cheaply converted into a
683    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
684    ///
685    /// Note that if you're are searching for the same needle in many
686    /// different small haystacks, it may be faster to initialize a
687    /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
688    ///
689    /// # Complexity
690    ///
691    /// This routine is guaranteed to have worst case linear time complexity
692    /// with respect to both the needle and the haystack. That is, this runs
693    /// in `O(needle.len() + haystack.len())` time.
694    ///
695    /// This routine is also guaranteed to have worst case constant space
696    /// complexity.
697    ///
698    /// # Examples
699    ///
700    /// Basic usage:
701    ///
702    /// ```
703    /// use bstr::ByteSlice;
704    ///
705    /// let s = b"foo bar baz";
706    /// assert_eq!(Some(0), s.find("foo"));
707    /// assert_eq!(Some(4), s.find("bar"));
708    /// assert_eq!(None, s.find("quux"));
709    /// ```
710    #[inline]
711    fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
712        Finder::new(needle.as_ref()).find(self.as_bytes())
713    }
714
715    /// Returns the index of the last occurrence of the given needle.
716    ///
717    /// The needle may be any type that can be cheaply converted into a
718    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
719    ///
720    /// Note that if you're are searching for the same needle in many
721    /// different small haystacks, it may be faster to initialize a
722    /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
723    /// each search.
724    ///
725    /// # Complexity
726    ///
727    /// This routine is guaranteed to have worst case linear time complexity
728    /// with respect to both the needle and the haystack. That is, this runs
729    /// in `O(needle.len() + haystack.len())` time.
730    ///
731    /// This routine is also guaranteed to have worst case constant space
732    /// complexity.
733    ///
734    /// # Examples
735    ///
736    /// Basic usage:
737    ///
738    /// ```
739    /// use bstr::ByteSlice;
740    ///
741    /// let s = b"foo bar baz";
742    /// assert_eq!(Some(0), s.rfind("foo"));
743    /// assert_eq!(Some(4), s.rfind("bar"));
744    /// assert_eq!(Some(8), s.rfind("ba"));
745    /// assert_eq!(None, s.rfind("quux"));
746    /// ```
747    #[inline]
748    fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
749        FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
750    }
751
752    /// Returns an iterator of the non-overlapping occurrences of the given
753    /// needle. The iterator yields byte offset positions indicating the start
754    /// of each match.
755    ///
756    /// # Complexity
757    ///
758    /// This routine is guaranteed to have worst case linear time complexity
759    /// with respect to both the needle and the haystack. That is, this runs
760    /// in `O(needle.len() + haystack.len())` time.
761    ///
762    /// This routine is also guaranteed to have worst case constant space
763    /// complexity.
764    ///
765    /// # Examples
766    ///
767    /// Basic usage:
768    ///
769    /// ```
770    /// use bstr::ByteSlice;
771    ///
772    /// let s = b"foo bar foo foo quux foo";
773    /// let matches: Vec<usize> = s.find_iter("foo").collect();
774    /// assert_eq!(matches, vec![0, 8, 12, 21]);
775    /// ```
776    ///
777    /// An empty string matches at every position, including the position
778    /// immediately following the last byte:
779    ///
780    /// ```
781    /// use bstr::ByteSlice;
782    ///
783    /// let matches: Vec<usize> = b"foo".find_iter("").collect();
784    /// assert_eq!(matches, vec![0, 1, 2, 3]);
785    ///
786    /// let matches: Vec<usize> = b"".find_iter("").collect();
787    /// assert_eq!(matches, vec![0]);
788    /// ```
789    #[inline]
790    fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
791        &'h self,
792        needle: &'n B,
793    ) -> Find<'h, 'n> {
794        Find::new(self.as_bytes(), needle.as_ref())
795    }
796
797    /// Returns an iterator of the non-overlapping occurrences of the given
798    /// needle in reverse. The iterator yields byte offset positions indicating
799    /// the start of each match.
800    ///
801    /// # Complexity
802    ///
803    /// This routine is guaranteed to have worst case linear time complexity
804    /// with respect to both the needle and the haystack. That is, this runs
805    /// in `O(needle.len() + haystack.len())` time.
806    ///
807    /// This routine is also guaranteed to have worst case constant space
808    /// complexity.
809    ///
810    /// # Examples
811    ///
812    /// Basic usage:
813    ///
814    /// ```
815    /// use bstr::ByteSlice;
816    ///
817    /// let s = b"foo bar foo foo quux foo";
818    /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
819    /// assert_eq!(matches, vec![21, 12, 8, 0]);
820    /// ```
821    ///
822    /// An empty string matches at every position, including the position
823    /// immediately following the last byte:
824    ///
825    /// ```
826    /// use bstr::ByteSlice;
827    ///
828    /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
829    /// assert_eq!(matches, vec![3, 2, 1, 0]);
830    ///
831    /// let matches: Vec<usize> = b"".rfind_iter("").collect();
832    /// assert_eq!(matches, vec![0]);
833    /// ```
834    #[inline]
835    fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
836        &'h self,
837        needle: &'n B,
838    ) -> FindReverse<'h, 'n> {
839        FindReverse::new(self.as_bytes(), needle.as_ref())
840    }
841
842    /// Returns the index of the first occurrence of the given byte. If the
843    /// byte does not occur in this byte string, then `None` is returned.
844    ///
845    /// # Examples
846    ///
847    /// Basic usage:
848    ///
849    /// ```
850    /// use bstr::ByteSlice;
851    ///
852    /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
853    /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
854    /// ```
855    #[inline]
856    fn find_byte(&self, byte: u8) -> Option<usize> {
857        memchr(byte, self.as_bytes())
858    }
859
860    /// Returns the index of the last occurrence of the given byte. If the
861    /// byte does not occur in this byte string, then `None` is returned.
862    ///
863    /// # Examples
864    ///
865    /// Basic usage:
866    ///
867    /// ```
868    /// use bstr::ByteSlice;
869    ///
870    /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
871    /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
872    /// ```
873    #[inline]
874    fn rfind_byte(&self, byte: u8) -> Option<usize> {
875        memrchr(byte, self.as_bytes())
876    }
877
878    /// Returns the index of the first occurrence of the given codepoint.
879    /// If the codepoint does not occur in this byte string, then `None` is
880    /// returned.
881    ///
882    /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
883    /// then only explicit occurrences of that encoding will be found. Invalid
884    /// UTF-8 sequences will not be matched.
885    ///
886    /// # Examples
887    ///
888    /// Basic usage:
889    ///
890    /// ```
891    /// use bstr::{B, ByteSlice};
892    ///
893    /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
894    /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
895    /// assert_eq!(None, b"foo bar baz".find_char('y'));
896    /// ```
897    #[inline]
898    fn find_char(&self, ch: char) -> Option<usize> {
899        self.find(ch.encode_utf8(&mut [0; 4]))
900    }
901
902    /// Returns the index of the last occurrence of the given codepoint.
903    /// If the codepoint does not occur in this byte string, then `None` is
904    /// returned.
905    ///
906    /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
907    /// then only explicit occurrences of that encoding will be found. Invalid
908    /// UTF-8 sequences will not be matched.
909    ///
910    /// # Examples
911    ///
912    /// Basic usage:
913    ///
914    /// ```
915    /// use bstr::{B, ByteSlice};
916    ///
917    /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
918    /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
919    /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
920    /// ```
921    #[inline]
922    fn rfind_char(&self, ch: char) -> Option<usize> {
923        self.rfind(ch.encode_utf8(&mut [0; 4]))
924    }
925
926    /// Returns the index of the first occurrence of any of the bytes in the
927    /// provided set.
928    ///
929    /// The `byteset` may be any type that can be cheaply converted into a
930    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
931    /// note that passing a `&str` which contains multibyte characters may not
932    /// behave as you expect: each byte in the `&str` is treated as an
933    /// individual member of the byte set.
934    ///
935    /// Note that order is irrelevant for the `byteset` parameter, and
936    /// duplicate bytes present in its body are ignored.
937    ///
938    /// # Complexity
939    ///
940    /// This routine is guaranteed to have worst case linear time complexity
941    /// with respect to both the set of bytes and the haystack. That is, this
942    /// runs in `O(byteset.len() + haystack.len())` time.
943    ///
944    /// This routine is also guaranteed to have worst case constant space
945    /// complexity.
946    ///
947    /// # Examples
948    ///
949    /// Basic usage:
950    ///
951    /// ```
952    /// use bstr::ByteSlice;
953    ///
954    /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
955    /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
956    /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
957    /// // The empty byteset never matches.
958    /// assert_eq!(None, b"abc".find_byteset(b""));
959    /// assert_eq!(None, b"".find_byteset(b""));
960    /// ```
961    #[inline]
962    fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
963        byteset::find(self.as_bytes(), byteset.as_ref())
964    }
965
966    /// Returns the index of the first occurrence of a byte that is not a
967    /// member of the provided set.
968    ///
969    /// The `byteset` may be any type that can be cheaply converted into a
970    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
971    /// note that passing a `&str` which contains multibyte characters may not
972    /// behave as you expect: each byte in the `&str` is treated as an
973    /// individual member of the byte set.
974    ///
975    /// Note that order is irrelevant for the `byteset` parameter, and
976    /// duplicate bytes present in its body are ignored.
977    ///
978    /// # Complexity
979    ///
980    /// This routine is guaranteed to have worst case linear time complexity
981    /// with respect to both the set of bytes and the haystack. That is, this
982    /// runs in `O(byteset.len() + haystack.len())` time.
983    ///
984    /// This routine is also guaranteed to have worst case constant space
985    /// complexity.
986    ///
987    /// # Examples
988    ///
989    /// Basic usage:
990    ///
991    /// ```
992    /// use bstr::ByteSlice;
993    ///
994    /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
995    /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
996    /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
997    /// // The negation of the empty byteset matches everything.
998    /// assert_eq!(Some(0), b"abc".find_not_byteset(b""));
999    /// // But an empty string never contains anything.
1000    /// assert_eq!(None, b"".find_not_byteset(b""));
1001    /// ```
1002    #[inline]
1003    fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1004        byteset::find_not(self.as_bytes(), byteset.as_ref())
1005    }
1006
1007    /// Returns the index of the last occurrence of any of the bytes in the
1008    /// provided set.
1009    ///
1010    /// The `byteset` may be any type that can be cheaply converted into a
1011    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1012    /// note that passing a `&str` which contains multibyte characters may not
1013    /// behave as you expect: each byte in the `&str` is treated as an
1014    /// individual member of the byte set.
1015    ///
1016    /// Note that order is irrelevant for the `byteset` parameter, and duplicate
1017    /// bytes present in its body are ignored.
1018    ///
1019    /// # Complexity
1020    ///
1021    /// This routine is guaranteed to have worst case linear time complexity
1022    /// with respect to both the set of bytes and the haystack. That is, this
1023    /// runs in `O(byteset.len() + haystack.len())` time.
1024    ///
1025    /// This routine is also guaranteed to have worst case constant space
1026    /// complexity.
1027    ///
1028    /// # Examples
1029    ///
1030    /// Basic usage:
1031    ///
1032    /// ```
1033    /// use bstr::ByteSlice;
1034    ///
1035    /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
1036    /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
1037    /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
1038    /// ```
1039    #[inline]
1040    fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1041        byteset::rfind(self.as_bytes(), byteset.as_ref())
1042    }
1043
1044    /// Returns the index of the last occurrence of a byte that is not a member
1045    /// of the provided set.
1046    ///
1047    /// The `byteset` may be any type that can be cheaply converted into a
1048    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1049    /// note that passing a `&str` which contains multibyte characters may not
1050    /// behave as you expect: each byte in the `&str` is treated as an
1051    /// individual member of the byte set.
1052    ///
1053    /// Note that order is irrelevant for the `byteset` parameter, and
1054    /// duplicate bytes present in its body are ignored.
1055    ///
1056    /// # Complexity
1057    ///
1058    /// This routine is guaranteed to have worst case linear time complexity
1059    /// with respect to both the set of bytes and the haystack. That is, this
1060    /// runs in `O(byteset.len() + haystack.len())` time.
1061    ///
1062    /// This routine is also guaranteed to have worst case constant space
1063    /// complexity.
1064    ///
1065    /// # Examples
1066    ///
1067    /// Basic usage:
1068    ///
1069    /// ```
1070    /// use bstr::ByteSlice;
1071    ///
1072    /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
1073    /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
1074    /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
1075    /// ```
1076    #[inline]
1077    fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1078        byteset::rfind_not(self.as_bytes(), byteset.as_ref())
1079    }
1080
1081    /// Returns an iterator over the fields in a byte string, separated
1082    /// by contiguous whitespace (according to the Unicode property
1083    /// `White_Space`).
1084    ///
1085    /// # Example
1086    ///
1087    /// Basic usage:
1088    ///
1089    /// ```
1090    /// use bstr::{B, ByteSlice};
1091    ///
1092    /// let s = B("  foo\tbar\t\u{2003}\nquux   \n");
1093    /// let fields: Vec<&[u8]> = s.fields().collect();
1094    /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1095    /// ```
1096    ///
1097    /// A byte string consisting of just whitespace yields no elements:
1098    ///
1099    /// ```
1100    /// use bstr::{B, ByteSlice};
1101    ///
1102    /// assert_eq!(0, B("  \n\t\u{2003}\n  \t").fields().count());
1103    /// ```
1104    #[cfg(feature = "unicode")]
1105    #[inline]
1106    fn fields(&self) -> Fields<'_> {
1107        Fields::new(self.as_bytes())
1108    }
1109
1110    /// Returns an iterator over the fields in a byte string, separated by
1111    /// contiguous codepoints satisfying the given predicate.
1112    ///
1113    /// If this byte string is not valid UTF-8, then the given closure will
1114    /// be called with a Unicode replacement codepoint when invalid UTF-8
1115    /// bytes are seen.
1116    ///
1117    /// # Example
1118    ///
1119    /// Basic usage:
1120    ///
1121    /// ```
1122    /// use bstr::{B, ByteSlice};
1123    ///
1124    /// let s = b"123foo999999bar1quux123456";
1125    /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
1126    /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1127    /// ```
1128    ///
1129    /// A byte string consisting of all codepoints satisfying the predicate
1130    /// yields no elements:
1131    ///
1132    /// ```
1133    /// use bstr::ByteSlice;
1134    ///
1135    /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
1136    /// ```
1137    #[inline]
1138    fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> {
1139        FieldsWith::new(self.as_bytes(), f)
1140    }
1141
1142    /// Returns an iterator over substrings of this byte string, separated
1143    /// by the given byte string. Each element yielded is guaranteed not to
1144    /// include the splitter substring.
1145    ///
1146    /// The splitter may be any type that can be cheaply converted into a
1147    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1148    ///
1149    /// # Examples
1150    ///
1151    /// Basic usage:
1152    ///
1153    /// ```
1154    /// use bstr::{B, ByteSlice};
1155    ///
1156    /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
1157    /// assert_eq!(x, vec![
1158    ///     B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
1159    /// ]);
1160    ///
1161    /// let x: Vec<&[u8]> = b"".split_str("X").collect();
1162    /// assert_eq!(x, vec![b""]);
1163    ///
1164    /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
1165    /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
1166    ///
1167    /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
1168    /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
1169    /// ```
1170    ///
1171    /// If a string contains multiple contiguous separators, you will end up
1172    /// with empty strings yielded by the iterator:
1173    ///
1174    /// ```
1175    /// use bstr::{B, ByteSlice};
1176    ///
1177    /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
1178    /// assert_eq!(x, vec![
1179    ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1180    /// ]);
1181    ///
1182    /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
1183    /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
1184    /// ```
1185    ///
1186    /// Separators at the start or end of a string are neighbored by empty
1187    /// strings.
1188    ///
1189    /// ```
1190    /// use bstr::{B, ByteSlice};
1191    ///
1192    /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
1193    /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1194    /// ```
1195    ///
1196    /// When the empty string is used as a separator, it splits every **byte**
1197    /// in the byte string, along with the beginning and end of the byte
1198    /// string.
1199    ///
1200    /// ```
1201    /// use bstr::{B, ByteSlice};
1202    ///
1203    /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
1204    /// assert_eq!(x, vec![
1205    ///     B(""), B("r"), B("u"), B("s"), B("t"), B(""),
1206    /// ]);
1207    ///
1208    /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1209    /// // may not be valid UTF-8!
1210    /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
1211    /// assert_eq!(x, vec![
1212    ///     B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
1213    /// ]);
1214    /// ```
1215    ///
1216    /// Contiguous separators, especially whitespace, can lead to possibly
1217    /// surprising behavior. For example, this code is correct:
1218    ///
1219    /// ```
1220    /// use bstr::{B, ByteSlice};
1221    ///
1222    /// let x: Vec<&[u8]> = b"    a  b c".split_str(" ").collect();
1223    /// assert_eq!(x, vec![
1224    ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1225    /// ]);
1226    /// ```
1227    ///
1228    /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
1229    /// [`fields`](#method.fields) instead.
1230    #[inline]
1231    fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1232        &'h self,
1233        splitter: &'s B,
1234    ) -> Split<'h, 's> {
1235        Split::new(self.as_bytes(), splitter.as_ref())
1236    }
1237
1238    /// Returns an iterator over substrings of this byte string, separated by
1239    /// the given byte string, in reverse. Each element yielded is guaranteed
1240    /// not to include the splitter substring.
1241    ///
1242    /// The splitter may be any type that can be cheaply converted into a
1243    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1244    ///
1245    /// # Examples
1246    ///
1247    /// Basic usage:
1248    ///
1249    /// ```
1250    /// use bstr::{B, ByteSlice};
1251    ///
1252    /// let x: Vec<&[u8]> =
1253    ///     b"Mary had a little lamb".rsplit_str(" ").collect();
1254    /// assert_eq!(x, vec![
1255    ///     B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
1256    /// ]);
1257    ///
1258    /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
1259    /// assert_eq!(x, vec![b""]);
1260    ///
1261    /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
1262    /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
1263    ///
1264    /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
1265    /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
1266    /// ```
1267    ///
1268    /// If a string contains multiple contiguous separators, you will end up
1269    /// with empty strings yielded by the iterator:
1270    ///
1271    /// ```
1272    /// use bstr::{B, ByteSlice};
1273    ///
1274    /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
1275    /// assert_eq!(x, vec![
1276    ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1277    /// ]);
1278    ///
1279    /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
1280    /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
1281    /// ```
1282    ///
1283    /// Separators at the start or end of a string are neighbored by empty
1284    /// strings.
1285    ///
1286    /// ```
1287    /// use bstr::{B, ByteSlice};
1288    ///
1289    /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
1290    /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1291    /// ```
1292    ///
1293    /// When the empty string is used as a separator, it splits every **byte**
1294    /// in the byte string, along with the beginning and end of the byte
1295    /// string.
1296    ///
1297    /// ```
1298    /// use bstr::{B, ByteSlice};
1299    ///
1300    /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
1301    /// assert_eq!(x, vec![
1302    ///     B(""), B("t"), B("s"), B("u"), B("r"), B(""),
1303    /// ]);
1304    ///
1305    /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1306    /// // may not be valid UTF-8!
1307    /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
1308    /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
1309    /// ```
1310    ///
1311    /// Contiguous separators, especially whitespace, can lead to possibly
1312    /// surprising behavior. For example, this code is correct:
1313    ///
1314    /// ```
1315    /// use bstr::{B, ByteSlice};
1316    ///
1317    /// let x: Vec<&[u8]> = b"    a  b c".rsplit_str(" ").collect();
1318    /// assert_eq!(x, vec![
1319    ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1320    /// ]);
1321    /// ```
1322    ///
1323    /// It does *not* give you `["a", "b", "c"]`.
1324    #[inline]
1325    fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1326        &'h self,
1327        splitter: &'s B,
1328    ) -> SplitReverse<'h, 's> {
1329        SplitReverse::new(self.as_bytes(), splitter.as_ref())
1330    }
1331
1332    /// Split this byte string at the first occurrence of `splitter`.
1333    ///
1334    /// If the `splitter` is found in the byte string, returns a tuple
1335    /// containing the parts of the string before and after the first occurrence
1336    /// of `splitter` respectively. Otherwise, if there are no occurrences of
1337    /// `splitter` in the byte string, returns `None`.
1338    ///
1339    /// The splitter may be any type that can be cheaply converted into a
1340    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1341    ///
1342    /// If you need to split on the *last* instance of a delimiter instead, see
1343    /// the [`ByteSlice::rsplit_once_str`](#method.rsplit_once_str) method .
1344    ///
1345    /// # Examples
1346    ///
1347    /// Basic usage:
1348    ///
1349    /// ```
1350    /// use bstr::{B, ByteSlice};
1351    ///
1352    /// assert_eq!(
1353    ///     B("foo,bar").split_once_str(","),
1354    ///     Some((B("foo"), B("bar"))),
1355    /// );
1356    /// assert_eq!(
1357    ///     B("foo,bar,baz").split_once_str(","),
1358    ///     Some((B("foo"), B("bar,baz"))),
1359    /// );
1360    /// assert_eq!(B("foo").split_once_str(","), None);
1361    /// assert_eq!(B("foo,").split_once_str(b","), Some((B("foo"), B(""))));
1362    /// assert_eq!(B(",foo").split_once_str(b","), Some((B(""), B("foo"))));
1363    /// ```
1364    #[inline]
1365    fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>(
1366        &'a self,
1367        splitter: &B,
1368    ) -> Option<(&'a [u8], &'a [u8])> {
1369        let bytes = self.as_bytes();
1370        let splitter = splitter.as_ref();
1371        let start = Finder::new(splitter).find(bytes)?;
1372        let end = start + splitter.len();
1373        Some((&bytes[..start], &bytes[end..]))
1374    }
1375
1376    /// Split this byte string at the last occurrence of `splitter`.
1377    ///
1378    /// If the `splitter` is found in the byte string, returns a tuple
1379    /// containing the parts of the string before and after the last occurrence
1380    /// of `splitter`, respectively. Otherwise, if there are no occurrences of
1381    /// `splitter` in the byte string, returns `None`.
1382    ///
1383    /// The splitter may be any type that can be cheaply converted into a
1384    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1385    ///
1386    /// If you need to split on the *first* instance of a delimiter instead, see
1387    /// the [`ByteSlice::split_once_str`](#method.split_once_str) method.
1388    ///
1389    /// # Examples
1390    ///
1391    /// Basic usage:
1392    ///
1393    /// ```
1394    /// use bstr::{B, ByteSlice};
1395    ///
1396    /// assert_eq!(
1397    ///     B("foo,bar").rsplit_once_str(","),
1398    ///     Some((B("foo"), B("bar"))),
1399    /// );
1400    /// assert_eq!(
1401    ///     B("foo,bar,baz").rsplit_once_str(","),
1402    ///     Some((B("foo,bar"), B("baz"))),
1403    /// );
1404    /// assert_eq!(B("foo").rsplit_once_str(","), None);
1405    /// assert_eq!(B("foo,").rsplit_once_str(b","), Some((B("foo"), B(""))));
1406    /// assert_eq!(B(",foo").rsplit_once_str(b","), Some((B(""), B("foo"))));
1407    /// ```
1408    #[inline]
1409    fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>(
1410        &'a self,
1411        splitter: &B,
1412    ) -> Option<(&'a [u8], &'a [u8])> {
1413        let bytes = self.as_bytes();
1414        let splitter = splitter.as_ref();
1415        let start = FinderReverse::new(splitter).rfind(bytes)?;
1416        let end = start + splitter.len();
1417        Some((&bytes[..start], &bytes[end..]))
1418    }
1419
1420    /// Returns an iterator of at most `limit` substrings of this byte string,
1421    /// separated by the given byte string. If `limit` substrings are yielded,
1422    /// then the last substring will contain the remainder of this byte string.
1423    ///
1424    /// The needle may be any type that can be cheaply converted into a
1425    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1426    ///
1427    /// # Examples
1428    ///
1429    /// Basic usage:
1430    ///
1431    /// ```
1432    /// use bstr::{B, ByteSlice};
1433    ///
1434    /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
1435    /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
1436    ///
1437    /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
1438    /// assert_eq!(x, vec![b""]);
1439    ///
1440    /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
1441    /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
1442    ///
1443    /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
1444    /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
1445    ///
1446    /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
1447    /// assert_eq!(x, vec![B("abcXdef")]);
1448    ///
1449    /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
1450    /// assert_eq!(x, vec![B("abcdef")]);
1451    ///
1452    /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
1453    /// assert!(x.is_empty());
1454    /// ```
1455    #[inline]
1456    fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1457        &'h self,
1458        limit: usize,
1459        splitter: &'s B,
1460    ) -> SplitN<'h, 's> {
1461        SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
1462    }
1463
1464    /// Returns an iterator of at most `limit` substrings of this byte string,
1465    /// separated by the given byte string, in reverse. If `limit` substrings
1466    /// are yielded, then the last substring will contain the remainder of this
1467    /// byte string.
1468    ///
1469    /// The needle may be any type that can be cheaply converted into a
1470    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1471    ///
1472    /// # Examples
1473    ///
1474    /// Basic usage:
1475    ///
1476    /// ```
1477    /// use bstr::{B, ByteSlice};
1478    ///
1479    /// let x: Vec<_> =
1480    ///     b"Mary had a little lamb".rsplitn_str(3, " ").collect();
1481    /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
1482    ///
1483    /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
1484    /// assert_eq!(x, vec![b""]);
1485    ///
1486    /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
1487    /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
1488    ///
1489    /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
1490    /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
1491    ///
1492    /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
1493    /// assert_eq!(x, vec![B("abcXdef")]);
1494    ///
1495    /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
1496    /// assert_eq!(x, vec![B("abcdef")]);
1497    ///
1498    /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
1499    /// assert!(x.is_empty());
1500    /// ```
1501    #[inline]
1502    fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1503        &'h self,
1504        limit: usize,
1505        splitter: &'s B,
1506    ) -> SplitNReverse<'h, 's> {
1507        SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
1508    }
1509
1510    /// Replace all matches of the given needle with the given replacement, and
1511    /// the result as a new `Vec<u8>`.
1512    ///
1513    /// This routine is useful as a convenience. If you need to reuse an
1514    /// allocation, use [`replace_into`](#method.replace_into) instead.
1515    ///
1516    /// # Examples
1517    ///
1518    /// Basic usage:
1519    ///
1520    /// ```
1521    /// use bstr::ByteSlice;
1522    ///
1523    /// let s = b"this is old".replace("old", "new");
1524    /// assert_eq!(s, "this is new".as_bytes());
1525    /// ```
1526    ///
1527    /// When the pattern doesn't match:
1528    ///
1529    /// ```
1530    /// use bstr::ByteSlice;
1531    ///
1532    /// let s = b"this is old".replace("nada nada", "limonada");
1533    /// assert_eq!(s, "this is old".as_bytes());
1534    /// ```
1535    ///
1536    /// When the needle is an empty string:
1537    ///
1538    /// ```
1539    /// use bstr::ByteSlice;
1540    ///
1541    /// let s = b"foo".replace("", "Z");
1542    /// assert_eq!(s, "ZfZoZoZ".as_bytes());
1543    /// ```
1544    #[cfg(feature = "alloc")]
1545    #[inline]
1546    fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1547        &self,
1548        needle: N,
1549        replacement: R,
1550    ) -> Vec<u8> {
1551        let mut dest = Vec::with_capacity(self.as_bytes().len());
1552        self.replace_into(needle, replacement, &mut dest);
1553        dest
1554    }
1555
1556    /// Replace up to `limit` matches of the given needle with the given
1557    /// replacement, and the result as a new `Vec<u8>`.
1558    ///
1559    /// This routine is useful as a convenience. If you need to reuse an
1560    /// allocation, use [`replacen_into`](#method.replacen_into) instead.
1561    ///
1562    /// # Examples
1563    ///
1564    /// Basic usage:
1565    ///
1566    /// ```
1567    /// use bstr::ByteSlice;
1568    ///
1569    /// let s = b"foofoo".replacen("o", "z", 2);
1570    /// assert_eq!(s, "fzzfoo".as_bytes());
1571    /// ```
1572    ///
1573    /// When the pattern doesn't match:
1574    ///
1575    /// ```
1576    /// use bstr::ByteSlice;
1577    ///
1578    /// let s = b"foofoo".replacen("a", "z", 2);
1579    /// assert_eq!(s, "foofoo".as_bytes());
1580    /// ```
1581    ///
1582    /// When the needle is an empty string:
1583    ///
1584    /// ```
1585    /// use bstr::ByteSlice;
1586    ///
1587    /// let s = b"foo".replacen("", "Z", 2);
1588    /// assert_eq!(s, "ZfZoo".as_bytes());
1589    /// ```
1590    #[cfg(feature = "alloc")]
1591    #[inline]
1592    fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1593        &self,
1594        needle: N,
1595        replacement: R,
1596        limit: usize,
1597    ) -> Vec<u8> {
1598        let mut dest = Vec::with_capacity(self.as_bytes().len());
1599        self.replacen_into(needle, replacement, limit, &mut dest);
1600        dest
1601    }
1602
1603    /// Replace all matches of the given needle with the given replacement,
1604    /// and write the result into the provided `Vec<u8>`.
1605    ///
1606    /// This does **not** clear `dest` before writing to it.
1607    ///
1608    /// This routine is useful for reusing allocation. For a more convenient
1609    /// API, use [`replace`](#method.replace) instead.
1610    ///
1611    /// # Examples
1612    ///
1613    /// Basic usage:
1614    ///
1615    /// ```
1616    /// use bstr::ByteSlice;
1617    ///
1618    /// let s = b"this is old";
1619    ///
1620    /// let mut dest = vec![];
1621    /// s.replace_into("old", "new", &mut dest);
1622    /// assert_eq!(dest, "this is new".as_bytes());
1623    /// ```
1624    ///
1625    /// When the pattern doesn't match:
1626    ///
1627    /// ```
1628    /// use bstr::ByteSlice;
1629    ///
1630    /// let s = b"this is old";
1631    ///
1632    /// let mut dest = vec![];
1633    /// s.replace_into("nada nada", "limonada", &mut dest);
1634    /// assert_eq!(dest, "this is old".as_bytes());
1635    /// ```
1636    ///
1637    /// When the needle is an empty string:
1638    ///
1639    /// ```
1640    /// use bstr::ByteSlice;
1641    ///
1642    /// let s = b"foo";
1643    ///
1644    /// let mut dest = vec![];
1645    /// s.replace_into("", "Z", &mut dest);
1646    /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
1647    /// ```
1648    #[cfg(feature = "alloc")]
1649    #[inline]
1650    fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1651        &self,
1652        needle: N,
1653        replacement: R,
1654        dest: &mut Vec<u8>,
1655    ) {
1656        let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1657
1658        let mut last = 0;
1659        for start in self.find_iter(needle) {
1660            dest.push_str(&self.as_bytes()[last..start]);
1661            dest.push_str(replacement);
1662            last = start + needle.len();
1663        }
1664        dest.push_str(&self.as_bytes()[last..]);
1665    }
1666
1667    /// Replace up to `limit` matches of the given needle with the given
1668    /// replacement, and write the result into the provided `Vec<u8>`.
1669    ///
1670    /// This does **not** clear `dest` before writing to it.
1671    ///
1672    /// This routine is useful for reusing allocation. For a more convenient
1673    /// API, use [`replacen`](#method.replacen) instead.
1674    ///
1675    /// # Examples
1676    ///
1677    /// Basic usage:
1678    ///
1679    /// ```
1680    /// use bstr::ByteSlice;
1681    ///
1682    /// let s = b"foofoo";
1683    ///
1684    /// let mut dest = vec![];
1685    /// s.replacen_into("o", "z", 2, &mut dest);
1686    /// assert_eq!(dest, "fzzfoo".as_bytes());
1687    /// ```
1688    ///
1689    /// When the pattern doesn't match:
1690    ///
1691    /// ```
1692    /// use bstr::ByteSlice;
1693    ///
1694    /// let s = b"foofoo";
1695    ///
1696    /// let mut dest = vec![];
1697    /// s.replacen_into("a", "z", 2, &mut dest);
1698    /// assert_eq!(dest, "foofoo".as_bytes());
1699    /// ```
1700    ///
1701    /// When the needle is an empty string:
1702    ///
1703    /// ```
1704    /// use bstr::ByteSlice;
1705    ///
1706    /// let s = b"foo";
1707    ///
1708    /// let mut dest = vec![];
1709    /// s.replacen_into("", "Z", 2, &mut dest);
1710    /// assert_eq!(dest, "ZfZoo".as_bytes());
1711    /// ```
1712    #[cfg(feature = "alloc")]
1713    #[inline]
1714    fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1715        &self,
1716        needle: N,
1717        replacement: R,
1718        limit: usize,
1719        dest: &mut Vec<u8>,
1720    ) {
1721        let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1722
1723        let mut last = 0;
1724        for start in self.find_iter(needle).take(limit) {
1725            dest.push_str(&self.as_bytes()[last..start]);
1726            dest.push_str(replacement);
1727            last = start + needle.len();
1728        }
1729        dest.push_str(&self.as_bytes()[last..]);
1730    }
1731
1732    /// Returns an iterator over the bytes in this byte string.
1733    ///
1734    /// # Examples
1735    ///
1736    /// Basic usage:
1737    ///
1738    /// ```
1739    /// use bstr::ByteSlice;
1740    ///
1741    /// let bs = b"foobar";
1742    /// let bytes: Vec<u8> = bs.bytes().collect();
1743    /// assert_eq!(bytes, bs);
1744    /// ```
1745    #[inline]
1746    fn bytes(&self) -> Bytes<'_> {
1747        Bytes { it: self.as_bytes().iter() }
1748    }
1749
1750    /// Returns an iterator over the Unicode scalar values in this byte string.
1751    /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1752    /// is yielded instead.
1753    ///
1754    /// # Examples
1755    ///
1756    /// Basic usage:
1757    ///
1758    /// ```
1759    /// use bstr::ByteSlice;
1760    ///
1761    /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1762    /// let chars: Vec<char> = bs.chars().collect();
1763    /// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
1764    /// ```
1765    ///
1766    /// Codepoints can also be iterated over in reverse:
1767    ///
1768    /// ```
1769    /// use bstr::ByteSlice;
1770    ///
1771    /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1772    /// let chars: Vec<char> = bs.chars().rev().collect();
1773    /// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
1774    /// ```
1775    #[inline]
1776    fn chars(&self) -> Chars<'_> {
1777        Chars::new(self.as_bytes())
1778    }
1779
1780    /// Returns an iterator over the Unicode scalar values in this byte string
1781    /// along with their starting and ending byte index positions. If invalid
1782    /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1783    /// instead.
1784    ///
1785    /// Note that this is slightly different from the `CharIndices` iterator
1786    /// provided by the standard library. Aside from working on possibly
1787    /// invalid UTF-8, this iterator provides both the corresponding starting
1788    /// and ending byte indices of each codepoint yielded. The ending position
1789    /// is necessary to slice the original byte string when invalid UTF-8 bytes
1790    /// are converted into a Unicode replacement codepoint, since a single
1791    /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
1792    /// (inclusive).
1793    ///
1794    /// # Examples
1795    ///
1796    /// Basic usage:
1797    ///
1798    /// ```
1799    /// use bstr::ByteSlice;
1800    ///
1801    /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1802    /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
1803    /// assert_eq!(chars, vec![
1804    ///     (0, 3, '☃'),
1805    ///     (3, 4, '\u{FFFD}'),
1806    ///     (4, 8, '𝞃'),
1807    ///     (8, 10, '\u{FFFD}'),
1808    ///     (10, 11, 'a'),
1809    /// ]);
1810    /// ```
1811    ///
1812    /// Codepoints can also be iterated over in reverse:
1813    ///
1814    /// ```
1815    /// use bstr::ByteSlice;
1816    ///
1817    /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1818    /// let chars: Vec<(usize, usize, char)> = bs
1819    ///     .char_indices()
1820    ///     .rev()
1821    ///     .collect();
1822    /// assert_eq!(chars, vec![
1823    ///     (10, 11, 'a'),
1824    ///     (8, 10, '\u{FFFD}'),
1825    ///     (4, 8, '𝞃'),
1826    ///     (3, 4, '\u{FFFD}'),
1827    ///     (0, 3, '☃'),
1828    /// ]);
1829    /// ```
1830    #[inline]
1831    fn char_indices(&self) -> CharIndices<'_> {
1832        CharIndices::new(self.as_bytes())
1833    }
1834
1835    /// Iterate over chunks of valid UTF-8.
1836    ///
1837    /// The iterator returned yields chunks of valid UTF-8 separated by invalid
1838    /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1839    /// which are determined via the "substitution of maximal subparts"
1840    /// strategy described in the docs for the
1841    /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
1842    /// method.
1843    ///
1844    /// # Examples
1845    ///
1846    /// This example shows how to gather all valid and invalid chunks from a
1847    /// byte slice:
1848    ///
1849    /// ```
1850    /// use bstr::{ByteSlice, Utf8Chunk};
1851    ///
1852    /// let bytes = b"foo\xFD\xFEbar\xFF";
1853    ///
1854    /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
1855    /// for chunk in bytes.utf8_chunks() {
1856    ///     if !chunk.valid().is_empty() {
1857    ///         valid_chunks.push(chunk.valid());
1858    ///     }
1859    ///     if !chunk.invalid().is_empty() {
1860    ///         invalid_chunks.push(chunk.invalid());
1861    ///     }
1862    /// }
1863    ///
1864    /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
1865    /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
1866    /// ```
1867    #[inline]
1868    fn utf8_chunks(&self) -> Utf8Chunks<'_> {
1869        Utf8Chunks { bytes: self.as_bytes() }
1870    }
1871
1872    /// Returns an iterator over the grapheme clusters in this byte string.
1873    /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1874    /// is yielded instead.
1875    ///
1876    /// # Examples
1877    ///
1878    /// This example shows how multiple codepoints can combine to form a
1879    /// single grapheme cluster:
1880    ///
1881    /// ```
1882    /// use bstr::ByteSlice;
1883    ///
1884    /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1885    /// let graphemes: Vec<&str> = bs.graphemes().collect();
1886    /// assert_eq!(vec!["à̖", "🇺🇸"], graphemes);
1887    /// ```
1888    ///
1889    /// This shows that graphemes can be iterated over in reverse:
1890    ///
1891    /// ```
1892    /// use bstr::ByteSlice;
1893    ///
1894    /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1895    /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
1896    /// assert_eq!(vec!["🇺🇸", "à̖"], graphemes);
1897    /// ```
1898    #[cfg(feature = "unicode")]
1899    #[inline]
1900    fn graphemes(&self) -> Graphemes<'_> {
1901        Graphemes::new(self.as_bytes())
1902    }
1903
1904    /// Returns an iterator over the grapheme clusters in this byte string
1905    /// along with their starting and ending byte index positions. If invalid
1906    /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1907    /// instead.
1908    ///
1909    /// # Examples
1910    ///
1911    /// This example shows how to get the byte offsets of each individual
1912    /// grapheme cluster:
1913    ///
1914    /// ```
1915    /// use bstr::ByteSlice;
1916    ///
1917    /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1918    /// let graphemes: Vec<(usize, usize, &str)> =
1919    ///     bs.grapheme_indices().collect();
1920    /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "🇺🇸")], graphemes);
1921    /// ```
1922    ///
1923    /// This example shows what happens when invalid UTF-8 is encountered. Note
1924    /// that the offsets are valid indices into the original string, and do
1925    /// not necessarily correspond to the length of the `&str` returned!
1926    ///
1927    /// ```
1928    /// # #[cfg(all(feature = "alloc"))] {
1929    /// use bstr::{ByteSlice, ByteVec};
1930    ///
1931    /// let mut bytes = vec![];
1932    /// bytes.push_str("a\u{0300}\u{0316}");
1933    /// bytes.push(b'\xFF');
1934    /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
1935    ///
1936    /// let graphemes: Vec<(usize, usize, &str)> =
1937    ///     bytes.grapheme_indices().collect();
1938    /// assert_eq!(
1939    ///     graphemes,
1940    ///     vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "🇺🇸")]
1941    /// );
1942    /// # }
1943    /// ```
1944    #[cfg(feature = "unicode")]
1945    #[inline]
1946    fn grapheme_indices(&self) -> GraphemeIndices<'_> {
1947        GraphemeIndices::new(self.as_bytes())
1948    }
1949
1950    /// Returns an iterator over the words in this byte string. If invalid
1951    /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1952    /// instead.
1953    ///
1954    /// This is similar to
1955    /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
1956    /// except it only returns elements that contain a "word" character. A word
1957    /// character is defined by UTS #18 (Annex C) to be the combination of the
1958    /// `Alphabetic` and `Join_Control` properties, along with the
1959    /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1960    /// categories.
1961    ///
1962    /// Since words are made up of one or more codepoints, this iterator
1963    /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1964    /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1965    ///
1966    /// # Examples
1967    ///
1968    /// Basic usage:
1969    ///
1970    /// ```
1971    /// use bstr::ByteSlice;
1972    ///
1973    /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1974    /// let words: Vec<&str> = bs.words().collect();
1975    /// assert_eq!(words, vec![
1976    ///     "The", "quick", "brown", "fox", "can't",
1977    ///     "jump", "32.3", "feet", "right",
1978    /// ]);
1979    /// ```
1980    #[cfg(feature = "unicode")]
1981    #[inline]
1982    fn words(&self) -> Words<'_> {
1983        Words::new(self.as_bytes())
1984    }
1985
1986    /// Returns an iterator over the words in this byte string along with
1987    /// their starting and ending byte index positions.
1988    ///
1989    /// This is similar to
1990    /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
1991    /// except it only returns elements that contain a "word" character. A word
1992    /// character is defined by UTS #18 (Annex C) to be the combination of the
1993    /// `Alphabetic` and `Join_Control` properties, along with the
1994    /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1995    /// categories.
1996    ///
1997    /// Since words are made up of one or more codepoints, this iterator
1998    /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1999    /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2000    ///
2001    /// # Examples
2002    ///
2003    /// This example shows how to get the byte offsets of each individual
2004    /// word:
2005    ///
2006    /// ```
2007    /// use bstr::ByteSlice;
2008    ///
2009    /// let bs = b"can't jump 32.3 feet";
2010    /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
2011    /// assert_eq!(words, vec![
2012    ///     (0, 5, "can't"),
2013    ///     (6, 10, "jump"),
2014    ///     (11, 15, "32.3"),
2015    ///     (16, 20, "feet"),
2016    /// ]);
2017    /// ```
2018    #[cfg(feature = "unicode")]
2019    #[inline]
2020    fn word_indices(&self) -> WordIndices<'_> {
2021        WordIndices::new(self.as_bytes())
2022    }
2023
2024    /// Returns an iterator over the words in this byte string, along with
2025    /// all breaks between the words. Concatenating all elements yielded by
2026    /// the iterator results in the original string (modulo Unicode replacement
2027    /// codepoint substitutions if invalid UTF-8 is encountered).
2028    ///
2029    /// Since words are made up of one or more codepoints, this iterator
2030    /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2031    /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2032    ///
2033    /// # Examples
2034    ///
2035    /// Basic usage:
2036    ///
2037    /// ```
2038    /// use bstr::ByteSlice;
2039    ///
2040    /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
2041    /// let words: Vec<&str> = bs.words_with_breaks().collect();
2042    /// assert_eq!(words, vec![
2043    ///     "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
2044    ///     " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
2045    ///     ",", " ", "right", "?",
2046    /// ]);
2047    /// ```
2048    #[cfg(feature = "unicode")]
2049    #[inline]
2050    fn words_with_breaks(&self) -> WordsWithBreaks<'_> {
2051        WordsWithBreaks::new(self.as_bytes())
2052    }
2053
2054    /// Returns an iterator over the words and their byte offsets in this
2055    /// byte string, along with all breaks between the words. Concatenating
2056    /// all elements yielded by the iterator results in the original string
2057    /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
2058    /// encountered).
2059    ///
2060    /// Since words are made up of one or more codepoints, this iterator
2061    /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2062    /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2063    ///
2064    /// # Examples
2065    ///
2066    /// This example shows how to get the byte offsets of each individual
2067    /// word:
2068    ///
2069    /// ```
2070    /// use bstr::ByteSlice;
2071    ///
2072    /// let bs = b"can't jump 32.3 feet";
2073    /// let words: Vec<(usize, usize, &str)> =
2074    ///     bs.words_with_break_indices().collect();
2075    /// assert_eq!(words, vec![
2076    ///     (0, 5, "can't"),
2077    ///     (5, 6, " "),
2078    ///     (6, 10, "jump"),
2079    ///     (10, 11, " "),
2080    ///     (11, 15, "32.3"),
2081    ///     (15, 16, " "),
2082    ///     (16, 20, "feet"),
2083    /// ]);
2084    /// ```
2085    #[cfg(feature = "unicode")]
2086    #[inline]
2087    fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> {
2088        WordsWithBreakIndices::new(self.as_bytes())
2089    }
2090
2091    /// Returns an iterator over the sentences in this byte string.
2092    ///
2093    /// Typically, a sentence will include its trailing punctuation and
2094    /// whitespace. Concatenating all elements yielded by the iterator
2095    /// results in the original string (modulo Unicode replacement codepoint
2096    /// substitutions if invalid UTF-8 is encountered).
2097    ///
2098    /// Since sentences are made up of one or more codepoints, this iterator
2099    /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2100    /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2101    ///
2102    /// # Examples
2103    ///
2104    /// Basic usage:
2105    ///
2106    /// ```
2107    /// use bstr::ByteSlice;
2108    ///
2109    /// let bs = b"I want this. Not that. Right now.";
2110    /// let sentences: Vec<&str> = bs.sentences().collect();
2111    /// assert_eq!(sentences, vec![
2112    ///     "I want this. ",
2113    ///     "Not that. ",
2114    ///     "Right now.",
2115    /// ]);
2116    /// ```
2117    #[cfg(feature = "unicode")]
2118    #[inline]
2119    fn sentences(&self) -> Sentences<'_> {
2120        Sentences::new(self.as_bytes())
2121    }
2122
2123    /// Returns an iterator over the sentences in this byte string along with
2124    /// their starting and ending byte index positions.
2125    ///
2126    /// Typically, a sentence will include its trailing punctuation and
2127    /// whitespace. Concatenating all elements yielded by the iterator
2128    /// results in the original string (modulo Unicode replacement codepoint
2129    /// substitutions if invalid UTF-8 is encountered).
2130    ///
2131    /// Since sentences are made up of one or more codepoints, this iterator
2132    /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2133    /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2134    ///
2135    /// # Examples
2136    ///
2137    /// Basic usage:
2138    ///
2139    /// ```
2140    /// use bstr::ByteSlice;
2141    ///
2142    /// let bs = b"I want this. Not that. Right now.";
2143    /// let sentences: Vec<(usize, usize, &str)> =
2144    ///     bs.sentence_indices().collect();
2145    /// assert_eq!(sentences, vec![
2146    ///     (0, 13, "I want this. "),
2147    ///     (13, 23, "Not that. "),
2148    ///     (23, 33, "Right now."),
2149    /// ]);
2150    /// ```
2151    #[cfg(feature = "unicode")]
2152    #[inline]
2153    fn sentence_indices(&self) -> SentenceIndices<'_> {
2154        SentenceIndices::new(self.as_bytes())
2155    }
2156
2157    /// An iterator over all lines in a byte string, without their
2158    /// terminators.
2159    ///
2160    /// For this iterator, the only line terminators recognized are `\r\n` and
2161    /// `\n`.
2162    ///
2163    /// # Examples
2164    ///
2165    /// Basic usage:
2166    ///
2167    /// ```
2168    /// use bstr::{B, ByteSlice};
2169    ///
2170    /// let s = b"\
2171    /// foo
2172    ///
2173    /// bar\r
2174    /// baz
2175    ///
2176    ///
2177    /// quux";
2178    /// let lines: Vec<&[u8]> = s.lines().collect();
2179    /// assert_eq!(lines, vec![
2180    ///     B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
2181    /// ]);
2182    /// ```
2183    #[inline]
2184    fn lines(&self) -> Lines<'_> {
2185        Lines::new(self.as_bytes())
2186    }
2187
2188    /// An iterator over all lines in a byte string, including their
2189    /// terminators.
2190    ///
2191    /// For this iterator, the only line terminator recognized is `\n`. (Since
2192    /// line terminators are included, this also handles `\r\n` line endings.)
2193    ///
2194    /// Line terminators are only included if they are present in the original
2195    /// byte string. For example, the last line in a byte string may not end
2196    /// with a line terminator.
2197    ///
2198    /// Concatenating all elements yielded by this iterator is guaranteed to
2199    /// yield the original byte string.
2200    ///
2201    /// # Examples
2202    ///
2203    /// Basic usage:
2204    ///
2205    /// ```
2206    /// use bstr::{B, ByteSlice};
2207    ///
2208    /// let s = b"\
2209    /// foo
2210    ///
2211    /// bar\r
2212    /// baz
2213    ///
2214    ///
2215    /// quux";
2216    /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
2217    /// assert_eq!(lines, vec![
2218    ///     B("foo\n"),
2219    ///     B("\n"),
2220    ///     B("bar\r\n"),
2221    ///     B("baz\n"),
2222    ///     B("\n"),
2223    ///     B("\n"),
2224    ///     B("quux"),
2225    /// ]);
2226    /// ```
2227    #[inline]
2228    fn lines_with_terminator(&self) -> LinesWithTerminator<'_> {
2229        LinesWithTerminator::new(self.as_bytes())
2230    }
2231
2232    /// Return a byte string slice with leading and trailing whitespace
2233    /// removed.
2234    ///
2235    /// Whitespace is defined according to the terms of the `White_Space`
2236    /// Unicode property.
2237    ///
2238    /// # Examples
2239    ///
2240    /// Basic usage:
2241    ///
2242    /// ```
2243    /// use bstr::{B, ByteSlice};
2244    ///
2245    /// let s = B(" foo\tbar\t\u{2003}\n");
2246    /// assert_eq!(s.trim(), B("foo\tbar"));
2247    /// ```
2248    #[cfg(feature = "unicode")]
2249    #[inline]
2250    fn trim(&self) -> &[u8] {
2251        self.trim_start().trim_end()
2252    }
2253
2254    /// Return a byte string slice with leading whitespace removed.
2255    ///
2256    /// Whitespace is defined according to the terms of the `White_Space`
2257    /// Unicode property.
2258    ///
2259    /// # Examples
2260    ///
2261    /// Basic usage:
2262    ///
2263    /// ```
2264    /// use bstr::{B, ByteSlice};
2265    ///
2266    /// let s = B(" foo\tbar\t\u{2003}\n");
2267    /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
2268    /// ```
2269    #[cfg(feature = "unicode")]
2270    #[inline]
2271    fn trim_start(&self) -> &[u8] {
2272        let start = whitespace_len_fwd(self.as_bytes());
2273        &self.as_bytes()[start..]
2274    }
2275
2276    /// Return a byte string slice with trailing whitespace removed.
2277    ///
2278    /// Whitespace is defined according to the terms of the `White_Space`
2279    /// Unicode property.
2280    ///
2281    /// # Examples
2282    ///
2283    /// Basic usage:
2284    ///
2285    /// ```
2286    /// use bstr::{B, ByteSlice};
2287    ///
2288    /// let s = B(" foo\tbar\t\u{2003}\n");
2289    /// assert_eq!(s.trim_end(), B(" foo\tbar"));
2290    /// ```
2291    #[cfg(feature = "unicode")]
2292    #[inline]
2293    fn trim_end(&self) -> &[u8] {
2294        let end = whitespace_len_rev(self.as_bytes());
2295        &self.as_bytes()[..end]
2296    }
2297
2298    /// Return a byte string slice with leading and trailing characters
2299    /// satisfying the given predicate removed.
2300    ///
2301    /// # Examples
2302    ///
2303    /// Basic usage:
2304    ///
2305    /// ```
2306    /// use bstr::{B, ByteSlice};
2307    ///
2308    /// let s = b"123foo5bar789";
2309    /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
2310    /// ```
2311    #[inline]
2312    fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2313        self.trim_start_with(&mut trim).trim_end_with(&mut trim)
2314    }
2315
2316    /// Return a byte string slice with leading characters satisfying the given
2317    /// predicate removed.
2318    ///
2319    /// # Examples
2320    ///
2321    /// Basic usage:
2322    ///
2323    /// ```
2324    /// use bstr::{B, ByteSlice};
2325    ///
2326    /// let s = b"123foo5bar789";
2327    /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
2328    /// ```
2329    #[inline]
2330    fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2331        for (s, _, ch) in self.char_indices() {
2332            if !trim(ch) {
2333                return &self.as_bytes()[s..];
2334            }
2335        }
2336        b""
2337    }
2338
2339    /// Return a byte string slice with trailing characters satisfying the
2340    /// given predicate removed.
2341    ///
2342    /// # Examples
2343    ///
2344    /// Basic usage:
2345    ///
2346    /// ```
2347    /// use bstr::{B, ByteSlice};
2348    ///
2349    /// let s = b"123foo5bar789";
2350    /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
2351    /// ```
2352    #[inline]
2353    fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2354        for (_, e, ch) in self.char_indices().rev() {
2355            if !trim(ch) {
2356                return &self.as_bytes()[..e];
2357            }
2358        }
2359        b""
2360    }
2361
2362    /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
2363    /// byte string.
2364    ///
2365    /// In this case, lowercase is defined according to the `Lowercase` Unicode
2366    /// property.
2367    ///
2368    /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2369    /// then it is written to the given buffer unchanged.
2370    ///
2371    /// Note that some characters in this byte string may expand into multiple
2372    /// characters when changing the case, so the number of bytes written to
2373    /// the given byte string may not be equivalent to the number of bytes in
2374    /// this byte string.
2375    ///
2376    /// If you'd like to reuse an allocation for performance reasons, then use
2377    /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
2378    ///
2379    /// # Examples
2380    ///
2381    /// Basic usage:
2382    ///
2383    /// ```
2384    /// use bstr::{B, ByteSlice};
2385    ///
2386    /// let s = B("HELLO Β");
2387    /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
2388    /// ```
2389    ///
2390    /// Scripts without case are not changed:
2391    ///
2392    /// ```
2393    /// use bstr::{B, ByteSlice};
2394    ///
2395    /// let s = B("农历新年");
2396    /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
2397    /// ```
2398    ///
2399    /// Invalid UTF-8 remains as is:
2400    ///
2401    /// ```
2402    /// use bstr::{B, ByteSlice};
2403    ///
2404    /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2405    /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
2406    /// ```
2407    #[cfg(all(feature = "alloc", feature = "unicode"))]
2408    #[inline]
2409    fn to_lowercase(&self) -> Vec<u8> {
2410        let mut buf = vec![];
2411        self.to_lowercase_into(&mut buf);
2412        buf
2413    }
2414
2415    /// Writes the lowercase equivalent of this byte string into the given
2416    /// buffer. The buffer is not cleared before written to.
2417    ///
2418    /// In this case, lowercase is defined according to the `Lowercase`
2419    /// Unicode property.
2420    ///
2421    /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2422    /// then it is written to the given buffer unchanged.
2423    ///
2424    /// Note that some characters in this byte string may expand into multiple
2425    /// characters when changing the case, so the number of bytes written to
2426    /// the given byte string may not be equivalent to the number of bytes in
2427    /// this byte string.
2428    ///
2429    /// If you don't need to amortize allocation and instead prefer
2430    /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
2431    ///
2432    /// # Examples
2433    ///
2434    /// Basic usage:
2435    ///
2436    /// ```
2437    /// use bstr::{B, ByteSlice};
2438    ///
2439    /// let s = B("HELLO Β");
2440    ///
2441    /// let mut buf = vec![];
2442    /// s.to_lowercase_into(&mut buf);
2443    /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
2444    /// ```
2445    ///
2446    /// Scripts without case are not changed:
2447    ///
2448    /// ```
2449    /// use bstr::{B, ByteSlice};
2450    ///
2451    /// let s = B("农历新年");
2452    ///
2453    /// let mut buf = vec![];
2454    /// s.to_lowercase_into(&mut buf);
2455    /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
2456    /// ```
2457    ///
2458    /// Invalid UTF-8 remains as is:
2459    ///
2460    /// ```
2461    /// use bstr::{B, ByteSlice};
2462    ///
2463    /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2464    ///
2465    /// let mut buf = vec![];
2466    /// s.to_lowercase_into(&mut buf);
2467    /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
2468    /// ```
2469    #[cfg(all(feature = "alloc", feature = "unicode"))]
2470    #[inline]
2471    fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
2472        // TODO: This is the best we can do given what std exposes I think.
2473        // If we roll our own case handling, then we might be able to do this
2474        // a bit faster. We shouldn't roll our own case handling unless we
2475        // need to, e.g., for doing caseless matching or case folding.
2476
2477        // TODO(BUG): This doesn't handle any special casing rules.
2478
2479        buf.reserve(self.as_bytes().len());
2480        for (s, e, ch) in self.char_indices() {
2481            if ch == '\u{FFFD}' {
2482                buf.push_str(&self.as_bytes()[s..e]);
2483            } else if ch.is_ascii() {
2484                buf.push_char(ch.to_ascii_lowercase());
2485            } else {
2486                for upper in ch.to_lowercase() {
2487                    buf.push_char(upper);
2488                }
2489            }
2490        }
2491    }
2492
2493    /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
2494    /// this byte string.
2495    ///
2496    /// In this case, lowercase is only defined in ASCII letters. Namely, the
2497    /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2498    /// In particular, the length of the byte string returned is always
2499    /// equivalent to the length of this byte string.
2500    ///
2501    /// If you'd like to reuse an allocation for performance reasons, then use
2502    /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
2503    /// the conversion in place.
2504    ///
2505    /// # Examples
2506    ///
2507    /// Basic usage:
2508    ///
2509    /// ```
2510    /// use bstr::{B, ByteSlice};
2511    ///
2512    /// let s = B("HELLO Β");
2513    /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
2514    /// ```
2515    ///
2516    /// Invalid UTF-8 remains as is:
2517    ///
2518    /// ```
2519    /// use bstr::{B, ByteSlice};
2520    ///
2521    /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2522    /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
2523    /// ```
2524    #[cfg(feature = "alloc")]
2525    #[inline]
2526    fn to_ascii_lowercase(&self) -> Vec<u8> {
2527        self.as_bytes().to_ascii_lowercase()
2528    }
2529
2530    /// Convert this byte string to its lowercase ASCII equivalent in place.
2531    ///
2532    /// In this case, lowercase is only defined in ASCII letters. Namely, the
2533    /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2534    ///
2535    /// If you don't need to do the conversion in
2536    /// place and instead prefer convenience, then use
2537    /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
2538    ///
2539    /// # Examples
2540    ///
2541    /// Basic usage:
2542    ///
2543    /// ```
2544    /// use bstr::ByteSlice;
2545    ///
2546    /// let mut s = <Vec<u8>>::from("HELLO Β");
2547    /// s.make_ascii_lowercase();
2548    /// assert_eq!(s, "hello Β".as_bytes());
2549    /// ```
2550    ///
2551    /// Invalid UTF-8 remains as is:
2552    ///
2553    /// ```
2554    /// # #[cfg(feature = "alloc")] {
2555    /// use bstr::{B, ByteSlice, ByteVec};
2556    ///
2557    /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
2558    /// s.make_ascii_lowercase();
2559    /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
2560    /// # }
2561    /// ```
2562    #[inline]
2563    fn make_ascii_lowercase(&mut self) {
2564        self.as_bytes_mut().make_ascii_lowercase();
2565    }
2566
2567    /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
2568    /// byte string.
2569    ///
2570    /// In this case, uppercase is defined according to the `Uppercase`
2571    /// Unicode property.
2572    ///
2573    /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2574    /// then it is written to the given buffer unchanged.
2575    ///
2576    /// Note that some characters in this byte string may expand into multiple
2577    /// characters when changing the case, so the number of bytes written to
2578    /// the given byte string may not be equivalent to the number of bytes in
2579    /// this byte string.
2580    ///
2581    /// If you'd like to reuse an allocation for performance reasons, then use
2582    /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
2583    ///
2584    /// # Examples
2585    ///
2586    /// Basic usage:
2587    ///
2588    /// ```
2589    /// use bstr::{B, ByteSlice};
2590    ///
2591    /// let s = B("hello β");
2592    /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
2593    /// ```
2594    ///
2595    /// Scripts without case are not changed:
2596    ///
2597    /// ```
2598    /// use bstr::{B, ByteSlice};
2599    ///
2600    /// let s = B("农历新年");
2601    /// assert_eq!(s.to_uppercase(), B("农历新年"));
2602    /// ```
2603    ///
2604    /// Invalid UTF-8 remains as is:
2605    ///
2606    /// ```
2607    /// use bstr::{B, ByteSlice};
2608    ///
2609    /// let s = B(b"foo\xFFbar\xE2\x98baz");
2610    /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2611    /// ```
2612    #[cfg(all(feature = "alloc", feature = "unicode"))]
2613    #[inline]
2614    fn to_uppercase(&self) -> Vec<u8> {
2615        let mut buf = vec![];
2616        self.to_uppercase_into(&mut buf);
2617        buf
2618    }
2619
2620    /// Writes the uppercase equivalent of this byte string into the given
2621    /// buffer. The buffer is not cleared before written to.
2622    ///
2623    /// In this case, uppercase is defined according to the `Uppercase`
2624    /// Unicode property.
2625    ///
2626    /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2627    /// then it is written to the given buffer unchanged.
2628    ///
2629    /// Note that some characters in this byte string may expand into multiple
2630    /// characters when changing the case, so the number of bytes written to
2631    /// the given byte string may not be equivalent to the number of bytes in
2632    /// this byte string.
2633    ///
2634    /// If you don't need to amortize allocation and instead prefer
2635    /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
2636    ///
2637    /// # Examples
2638    ///
2639    /// Basic usage:
2640    ///
2641    /// ```
2642    /// use bstr::{B, ByteSlice};
2643    ///
2644    /// let s = B("hello β");
2645    ///
2646    /// let mut buf = vec![];
2647    /// s.to_uppercase_into(&mut buf);
2648    /// assert_eq!(buf, B("HELLO Β"));
2649    /// ```
2650    ///
2651    /// Scripts without case are not changed:
2652    ///
2653    /// ```
2654    /// use bstr::{B, ByteSlice};
2655    ///
2656    /// let s = B("农历新年");
2657    ///
2658    /// let mut buf = vec![];
2659    /// s.to_uppercase_into(&mut buf);
2660    /// assert_eq!(buf, B("农历新年"));
2661    /// ```
2662    ///
2663    /// Invalid UTF-8 remains as is:
2664    ///
2665    /// ```
2666    /// use bstr::{B, ByteSlice};
2667    ///
2668    /// let s = B(b"foo\xFFbar\xE2\x98baz");
2669    ///
2670    /// let mut buf = vec![];
2671    /// s.to_uppercase_into(&mut buf);
2672    /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2673    /// ```
2674    #[cfg(all(feature = "alloc", feature = "unicode"))]
2675    #[inline]
2676    fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
2677        // TODO: This is the best we can do given what std exposes I think.
2678        // If we roll our own case handling, then we might be able to do this
2679        // a bit faster. We shouldn't roll our own case handling unless we
2680        // need to, e.g., for doing caseless matching or case folding.
2681        buf.reserve(self.as_bytes().len());
2682        for (s, e, ch) in self.char_indices() {
2683            if ch == '\u{FFFD}' {
2684                buf.push_str(&self.as_bytes()[s..e]);
2685            } else if ch.is_ascii() {
2686                buf.push_char(ch.to_ascii_uppercase());
2687            } else {
2688                for upper in ch.to_uppercase() {
2689                    buf.push_char(upper);
2690                }
2691            }
2692        }
2693    }
2694
2695    /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
2696    /// this byte string.
2697    ///
2698    /// In this case, uppercase is only defined in ASCII letters. Namely, the
2699    /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2700    /// In particular, the length of the byte string returned is always
2701    /// equivalent to the length of this byte string.
2702    ///
2703    /// If you'd like to reuse an allocation for performance reasons, then use
2704    /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
2705    /// the conversion in place.
2706    ///
2707    /// # Examples
2708    ///
2709    /// Basic usage:
2710    ///
2711    /// ```
2712    /// use bstr::{B, ByteSlice};
2713    ///
2714    /// let s = B("hello β");
2715    /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
2716    /// ```
2717    ///
2718    /// Invalid UTF-8 remains as is:
2719    ///
2720    /// ```
2721    /// use bstr::{B, ByteSlice};
2722    ///
2723    /// let s = B(b"foo\xFFbar\xE2\x98baz");
2724    /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2725    /// ```
2726    #[cfg(feature = "alloc")]
2727    #[inline]
2728    fn to_ascii_uppercase(&self) -> Vec<u8> {
2729        self.as_bytes().to_ascii_uppercase()
2730    }
2731
2732    /// Convert this byte string to its uppercase ASCII equivalent in place.
2733    ///
2734    /// In this case, uppercase is only defined in ASCII letters. Namely, the
2735    /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2736    ///
2737    /// If you don't need to do the conversion in
2738    /// place and instead prefer convenience, then use
2739    /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
2740    ///
2741    /// # Examples
2742    ///
2743    /// Basic usage:
2744    ///
2745    /// ```
2746    /// use bstr::{B, ByteSlice};
2747    ///
2748    /// let mut s = <Vec<u8>>::from("hello β");
2749    /// s.make_ascii_uppercase();
2750    /// assert_eq!(s, B("HELLO β"));
2751    /// ```
2752    ///
2753    /// Invalid UTF-8 remains as is:
2754    ///
2755    /// ```
2756    /// # #[cfg(feature = "alloc")] {
2757    /// use bstr::{B, ByteSlice, ByteVec};
2758    ///
2759    /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
2760    /// s.make_ascii_uppercase();
2761    /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2762    /// # }
2763    /// ```
2764    #[inline]
2765    fn make_ascii_uppercase(&mut self) {
2766        self.as_bytes_mut().make_ascii_uppercase();
2767    }
2768
2769    /// Escapes this byte string into a sequence of `char` values.
2770    ///
2771    /// When the sequence of `char` values is concatenated into a string, the
2772    /// result is always valid UTF-8. Any unprintable or invalid UTF-8 in this
2773    /// byte string are escaped using using `\xNN` notation. Moreover, the
2774    /// characters `\0`, `\r`, `\n`, `\t` and `\` are escaped as well.
2775    ///
2776    /// This is useful when one wants to get a human readable view of the raw
2777    /// bytes that is also valid UTF-8.
2778    ///
2779    /// The iterator returned implements the `Display` trait. So one can do
2780    /// `b"foo\xFFbar".escape_bytes().to_string()` to get a `String` with its
2781    /// bytes escaped.
2782    ///
2783    /// The dual of this function is [`ByteVec::unescape_bytes`].
2784    ///
2785    /// Note that this is similar to, but not equivalent to the `Debug`
2786    /// implementation on [`BStr`] and [`BString`]. The `Debug` implementations
2787    /// also use the debug representation for all Unicode codepoints. However,
2788    /// this escaping routine only escapes individual bytes. All Unicode
2789    /// codepoints above `U+007F` are passed through unchanged without any
2790    /// escaping.
2791    ///
2792    /// # Examples
2793    ///
2794    /// ```
2795    /// # #[cfg(feature = "alloc")] {
2796    /// use bstr::{B, ByteSlice};
2797    ///
2798    /// assert_eq!(r"foo\xFFbar", b"foo\xFFbar".escape_bytes().to_string());
2799    /// assert_eq!(r"foo\nbar", b"foo\nbar".escape_bytes().to_string());
2800    /// assert_eq!(r"foo\tbar", b"foo\tbar".escape_bytes().to_string());
2801    /// assert_eq!(r"foo\\bar", b"foo\\bar".escape_bytes().to_string());
2802    /// assert_eq!(r"foo☃bar", B("foo☃bar").escape_bytes().to_string());
2803    /// # }
2804    /// ```
2805    #[inline]
2806    fn escape_bytes(&self) -> EscapeBytes<'_> {
2807        EscapeBytes::new(self.as_bytes())
2808    }
2809
2810    /// Reverse the bytes in this string, in place.
2811    ///
2812    /// This is not necessarily a well formed operation! For example, if this
2813    /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2814    /// string will likely result in invalid UTF-8 and otherwise non-sensical
2815    /// content.
2816    ///
2817    /// Note that this is equivalent to the generic `[u8]::reverse` method.
2818    /// This method is provided to permit callers to explicitly differentiate
2819    /// between reversing bytes, codepoints and graphemes.
2820    ///
2821    /// # Examples
2822    ///
2823    /// Basic usage:
2824    ///
2825    /// ```
2826    /// use bstr::ByteSlice;
2827    ///
2828    /// let mut s = <Vec<u8>>::from("hello");
2829    /// s.reverse_bytes();
2830    /// assert_eq!(s, "olleh".as_bytes());
2831    /// ```
2832    #[inline]
2833    fn reverse_bytes(&mut self) {
2834        self.as_bytes_mut().reverse();
2835    }
2836
2837    /// Reverse the codepoints in this string, in place.
2838    ///
2839    /// If this byte string is valid UTF-8, then its reversal by codepoint
2840    /// is also guaranteed to be valid UTF-8.
2841    ///
2842    /// This operation is equivalent to the following, but without allocating:
2843    ///
2844    /// ```
2845    /// use bstr::ByteSlice;
2846    ///
2847    /// let mut s = <Vec<u8>>::from("foo☃bar");
2848    ///
2849    /// let mut chars: Vec<char> = s.chars().collect();
2850    /// chars.reverse();
2851    ///
2852    /// let reversed: String = chars.into_iter().collect();
2853    /// assert_eq!(reversed, "rab☃oof");
2854    /// ```
2855    ///
2856    /// Note that this is not necessarily a well formed operation. For example,
2857    /// if this byte string contains grapheme clusters with more than one
2858    /// codepoint, then those grapheme clusters will not necessarily be
2859    /// preserved. If you'd like to preserve grapheme clusters, then use
2860    /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
2861    ///
2862    /// # Examples
2863    ///
2864    /// Basic usage:
2865    ///
2866    /// ```
2867    /// use bstr::ByteSlice;
2868    ///
2869    /// let mut s = <Vec<u8>>::from("foo☃bar");
2870    /// s.reverse_chars();
2871    /// assert_eq!(s, "rab☃oof".as_bytes());
2872    /// ```
2873    ///
2874    /// This example shows that not all reversals lead to a well formed string.
2875    /// For example, in this case, combining marks are used to put accents over
2876    /// some letters, and those accent marks must appear after the codepoints
2877    /// they modify.
2878    ///
2879    /// ```
2880    /// use bstr::{B, ByteSlice};
2881    ///
2882    /// let mut s = <Vec<u8>>::from("résumé");
2883    /// s.reverse_chars();
2884    /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
2885    /// ```
2886    ///
2887    /// A word of warning: the above example relies on the fact that
2888    /// `résumé` is in decomposed normal form, which means there are separate
2889    /// codepoints for the accents above `e`. If it is instead in composed
2890    /// normal form, then the example works:
2891    ///
2892    /// ```
2893    /// use bstr::{B, ByteSlice};
2894    ///
2895    /// let mut s = <Vec<u8>>::from("résumé");
2896    /// s.reverse_chars();
2897    /// assert_eq!(s, B("émusér"));
2898    /// ```
2899    ///
2900    /// The point here is to be cautious and not assume that just because
2901    /// `reverse_chars` works in one case, that it therefore works in all
2902    /// cases.
2903    #[inline]
2904    fn reverse_chars(&mut self) {
2905        let mut i = 0;
2906        loop {
2907            let (_, size) = utf8::decode(&self.as_bytes()[i..]);
2908            if size == 0 {
2909                break;
2910            }
2911            if size > 1 {
2912                self.as_bytes_mut()[i..i + size].reverse_bytes();
2913            }
2914            i += size;
2915        }
2916        self.reverse_bytes();
2917    }
2918
2919    /// Reverse the graphemes in this string, in place.
2920    ///
2921    /// If this byte string is valid UTF-8, then its reversal by grapheme
2922    /// is also guaranteed to be valid UTF-8.
2923    ///
2924    /// This operation is equivalent to the following, but without allocating:
2925    ///
2926    /// ```
2927    /// use bstr::ByteSlice;
2928    ///
2929    /// let mut s = <Vec<u8>>::from("foo☃bar");
2930    ///
2931    /// let mut graphemes: Vec<&str> = s.graphemes().collect();
2932    /// graphemes.reverse();
2933    ///
2934    /// let reversed = graphemes.concat();
2935    /// assert_eq!(reversed, "rab☃oof");
2936    /// ```
2937    ///
2938    /// # Examples
2939    ///
2940    /// Basic usage:
2941    ///
2942    /// ```
2943    /// use bstr::ByteSlice;
2944    ///
2945    /// let mut s = <Vec<u8>>::from("foo☃bar");
2946    /// s.reverse_graphemes();
2947    /// assert_eq!(s, "rab☃oof".as_bytes());
2948    /// ```
2949    ///
2950    /// This example shows how this correctly handles grapheme clusters,
2951    /// unlike `reverse_chars`.
2952    ///
2953    /// ```
2954    /// use bstr::ByteSlice;
2955    ///
2956    /// let mut s = <Vec<u8>>::from("résumé");
2957    /// s.reverse_graphemes();
2958    /// assert_eq!(s, "émusér".as_bytes());
2959    /// ```
2960    #[cfg(feature = "unicode")]
2961    #[inline]
2962    fn reverse_graphemes(&mut self) {
2963        use crate::unicode::decode_grapheme;
2964
2965        let mut i = 0;
2966        loop {
2967            let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
2968            if size == 0 {
2969                break;
2970            }
2971            if size > 1 {
2972                self.as_bytes_mut()[i..i + size].reverse_bytes();
2973            }
2974            i += size;
2975        }
2976        self.reverse_bytes();
2977    }
2978
2979    /// Returns true if and only if every byte in this byte string is ASCII.
2980    ///
2981    /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
2982    /// an ASCII codepoint if and only if it is in the inclusive range
2983    /// `[0, 127]`.
2984    ///
2985    /// # Examples
2986    ///
2987    /// Basic usage:
2988    ///
2989    /// ```
2990    /// use bstr::{B, ByteSlice};
2991    ///
2992    /// assert!(B("abc").is_ascii());
2993    /// assert!(!B("☃βツ").is_ascii());
2994    /// assert!(!B(b"\xFF").is_ascii());
2995    /// ```
2996    #[inline]
2997    fn is_ascii(&self) -> bool {
2998        ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
2999    }
3000
3001    /// Returns true if and only if the entire byte string is valid UTF-8.
3002    ///
3003    /// If you need location information about where a byte string's first
3004    /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
3005    ///
3006    /// # Examples
3007    ///
3008    /// Basic usage:
3009    ///
3010    /// ```
3011    /// use bstr::{B, ByteSlice};
3012    ///
3013    /// assert!(B("abc").is_utf8());
3014    /// assert!(B("☃βツ").is_utf8());
3015    /// // invalid bytes
3016    /// assert!(!B(b"abc\xFF").is_utf8());
3017    /// // surrogate encoding
3018    /// assert!(!B(b"\xED\xA0\x80").is_utf8());
3019    /// // incomplete sequence
3020    /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
3021    /// // overlong sequence
3022    /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
3023    /// ```
3024    #[inline]
3025    fn is_utf8(&self) -> bool {
3026        utf8::validate(self.as_bytes()).is_ok()
3027    }
3028
3029    /// Returns the last byte in this byte string, if it's non-empty. If this
3030    /// byte string is empty, this returns `None`.
3031    ///
3032    /// Note that this is like the generic `[u8]::last`, except this returns
3033    /// the byte by value instead of a reference to the byte.
3034    ///
3035    /// # Examples
3036    ///
3037    /// Basic usage:
3038    ///
3039    /// ```
3040    /// use bstr::ByteSlice;
3041    ///
3042    /// assert_eq!(Some(b'z'), b"baz".last_byte());
3043    /// assert_eq!(None, b"".last_byte());
3044    /// ```
3045    #[inline]
3046    fn last_byte(&self) -> Option<u8> {
3047        let bytes = self.as_bytes();
3048        bytes.get(bytes.len().saturating_sub(1)).map(|&b| b)
3049    }
3050
3051    /// Returns the index of the first non-ASCII byte in this byte string (if
3052    /// any such indices exist). Specifically, it returns the index of the
3053    /// first byte with a value greater than or equal to `0x80`.
3054    ///
3055    /// # Examples
3056    ///
3057    /// Basic usage:
3058    ///
3059    /// ```
3060    /// use bstr::{ByteSlice, B};
3061    ///
3062    /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
3063    /// assert_eq!(None, b"abcde".find_non_ascii_byte());
3064    /// assert_eq!(Some(0), B("😀").find_non_ascii_byte());
3065    /// ```
3066    #[inline]
3067    fn find_non_ascii_byte(&self) -> Option<usize> {
3068        let index = ascii::first_non_ascii_byte(self.as_bytes());
3069        if index == self.as_bytes().len() {
3070            None
3071        } else {
3072            Some(index)
3073        }
3074    }
3075}
3076
3077/// A single substring searcher fixed to a particular needle.
3078///
3079/// The purpose of this type is to permit callers to construct a substring
3080/// searcher that can be used to search haystacks without the overhead of
3081/// constructing the searcher in the first place. This is a somewhat niche
3082/// concern when it's necessary to re-use the same needle to search multiple
3083/// different haystacks with as little overhead as possible. In general, using
3084/// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
3085/// or
3086/// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
3087/// is good enough, but `Finder` is useful when you can meaningfully observe
3088/// searcher construction time in a profile.
3089///
3090/// When the `std` feature is enabled, then this type has an `into_owned`
3091/// version which permits building a `Finder` that is not connected to the
3092/// lifetime of its needle.
3093#[derive(Clone, Debug)]
3094pub struct Finder<'a>(memmem::Finder<'a>);
3095
3096impl<'a> Finder<'a> {
3097    /// Create a new finder for the given needle.
3098    #[inline]
3099    pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
3100        Finder(memmem::Finder::new(needle.as_ref()))
3101    }
3102
3103    /// Convert this finder into its owned variant, such that it no longer
3104    /// borrows the needle.
3105    ///
3106    /// If this is already an owned finder, then this is a no-op. Otherwise,
3107    /// this copies the needle.
3108    ///
3109    /// This is only available when the `std` feature is enabled.
3110    #[cfg(feature = "std")]
3111    #[inline]
3112    pub fn into_owned(self) -> Finder<'static> {
3113        Finder(self.0.into_owned())
3114    }
3115
3116    /// Returns the needle that this finder searches for.
3117    ///
3118    /// Note that the lifetime of the needle returned is tied to the lifetime
3119    /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
3120    /// finder's needle can be either borrowed or owned, so the lifetime of the
3121    /// needle returned must necessarily be the shorter of the two.
3122    #[inline]
3123    pub fn needle(&self) -> &[u8] {
3124        self.0.needle()
3125    }
3126
3127    /// Returns the index of the first occurrence of this needle in the given
3128    /// haystack.
3129    ///
3130    /// The haystack may be any type that can be cheaply converted into a
3131    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3132    ///
3133    /// # Complexity
3134    ///
3135    /// This routine is guaranteed to have worst case linear time complexity
3136    /// with respect to both the needle and the haystack. That is, this runs
3137    /// in `O(needle.len() + haystack.len())` time.
3138    ///
3139    /// This routine is also guaranteed to have worst case constant space
3140    /// complexity.
3141    ///
3142    /// # Examples
3143    ///
3144    /// Basic usage:
3145    ///
3146    /// ```
3147    /// use bstr::Finder;
3148    ///
3149    /// let haystack = "foo bar baz";
3150    /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
3151    /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
3152    /// assert_eq!(None, Finder::new("quux").find(haystack));
3153    /// ```
3154    #[inline]
3155    pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3156        self.0.find(haystack.as_ref())
3157    }
3158}
3159
3160/// A single substring reverse searcher fixed to a particular needle.
3161///
3162/// The purpose of this type is to permit callers to construct a substring
3163/// searcher that can be used to search haystacks without the overhead of
3164/// constructing the searcher in the first place. This is a somewhat niche
3165/// concern when it's necessary to re-use the same needle to search multiple
3166/// different haystacks with as little overhead as possible. In general, using
3167/// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
3168/// or
3169/// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
3170/// is good enough, but `FinderReverse` is useful when you can meaningfully
3171/// observe searcher construction time in a profile.
3172///
3173/// When the `std` feature is enabled, then this type has an `into_owned`
3174/// version which permits building a `FinderReverse` that is not connected to
3175/// the lifetime of its needle.
3176#[derive(Clone, Debug)]
3177pub struct FinderReverse<'a>(memmem::FinderRev<'a>);
3178
3179impl<'a> FinderReverse<'a> {
3180    /// Create a new reverse finder for the given needle.
3181    #[inline]
3182    pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
3183        FinderReverse(memmem::FinderRev::new(needle.as_ref()))
3184    }
3185
3186    /// Convert this finder into its owned variant, such that it no longer
3187    /// borrows the needle.
3188    ///
3189    /// If this is already an owned finder, then this is a no-op. Otherwise,
3190    /// this copies the needle.
3191    ///
3192    /// This is only available when the `std` feature is enabled.
3193    #[cfg(feature = "std")]
3194    #[inline]
3195    pub fn into_owned(self) -> FinderReverse<'static> {
3196        FinderReverse(self.0.into_owned())
3197    }
3198
3199    /// Returns the needle that this finder searches for.
3200    ///
3201    /// Note that the lifetime of the needle returned is tied to the lifetime
3202    /// of this finder, and may be shorter than the `'a` lifetime. Namely,
3203    /// a finder's needle can be either borrowed or owned, so the lifetime of
3204    /// the needle returned must necessarily be the shorter of the two.
3205    #[inline]
3206    pub fn needle(&self) -> &[u8] {
3207        self.0.needle()
3208    }
3209
3210    /// Returns the index of the last occurrence of this needle in the given
3211    /// haystack.
3212    ///
3213    /// The haystack may be any type that can be cheaply converted into a
3214    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3215    ///
3216    /// # Complexity
3217    ///
3218    /// This routine is guaranteed to have worst case linear time complexity
3219    /// with respect to both the needle and the haystack. That is, this runs
3220    /// in `O(needle.len() + haystack.len())` time.
3221    ///
3222    /// This routine is also guaranteed to have worst case constant space
3223    /// complexity.
3224    ///
3225    /// # Examples
3226    ///
3227    /// Basic usage:
3228    ///
3229    /// ```
3230    /// use bstr::FinderReverse;
3231    ///
3232    /// let haystack = "foo bar baz";
3233    /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
3234    /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
3235    /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
3236    /// ```
3237    #[inline]
3238    pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3239        self.0.rfind(haystack.as_ref())
3240    }
3241}
3242
3243/// An iterator over non-overlapping substring matches.
3244///
3245/// Matches are reported by the byte offset at which they begin.
3246///
3247/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
3248/// needle.
3249#[derive(Debug)]
3250pub struct Find<'h, 'n> {
3251    it: memmem::FindIter<'h, 'n>,
3252    haystack: &'h [u8],
3253    needle: &'n [u8],
3254}
3255
3256impl<'h, 'n> Find<'h, 'n> {
3257    fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> {
3258        Find { it: memmem::find_iter(haystack, needle), haystack, needle }
3259    }
3260}
3261
3262impl<'h, 'n> Iterator for Find<'h, 'n> {
3263    type Item = usize;
3264
3265    #[inline]
3266    fn next(&mut self) -> Option<usize> {
3267        self.it.next()
3268    }
3269}
3270
3271/// An iterator over non-overlapping substring matches in reverse.
3272///
3273/// Matches are reported by the byte offset at which they begin.
3274///
3275/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
3276/// needle.
3277#[derive(Debug)]
3278pub struct FindReverse<'h, 'n> {
3279    it: memmem::FindRevIter<'h, 'n>,
3280    haystack: &'h [u8],
3281    needle: &'n [u8],
3282}
3283
3284impl<'h, 'n> FindReverse<'h, 'n> {
3285    fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> {
3286        FindReverse {
3287            it: memmem::rfind_iter(haystack, needle),
3288            haystack,
3289            needle,
3290        }
3291    }
3292
3293    fn haystack(&self) -> &'h [u8] {
3294        self.haystack
3295    }
3296
3297    fn needle(&self) -> &'n [u8] {
3298        self.needle
3299    }
3300}
3301
3302impl<'h, 'n> Iterator for FindReverse<'h, 'n> {
3303    type Item = usize;
3304
3305    #[inline]
3306    fn next(&mut self) -> Option<usize> {
3307        self.it.next()
3308    }
3309}
3310
3311/// An iterator over the bytes in a byte string.
3312///
3313/// `'a` is the lifetime of the byte string being traversed.
3314#[derive(Clone, Debug)]
3315pub struct Bytes<'a> {
3316    it: slice::Iter<'a, u8>,
3317}
3318
3319impl<'a> Bytes<'a> {
3320    /// Views the remaining underlying data as a subslice of the original data.
3321    /// This has the same lifetime as the original slice,
3322    /// and so the iterator can continue to be used while this exists.
3323    #[inline]
3324    pub fn as_bytes(&self) -> &'a [u8] {
3325        self.it.as_slice()
3326    }
3327}
3328
3329impl<'a> Iterator for Bytes<'a> {
3330    type Item = u8;
3331
3332    #[inline]
3333    fn next(&mut self) -> Option<u8> {
3334        self.it.next().map(|&b| b)
3335    }
3336
3337    #[inline]
3338    fn size_hint(&self) -> (usize, Option<usize>) {
3339        self.it.size_hint()
3340    }
3341}
3342
3343impl<'a> DoubleEndedIterator for Bytes<'a> {
3344    #[inline]
3345    fn next_back(&mut self) -> Option<u8> {
3346        self.it.next_back().map(|&b| b)
3347    }
3348}
3349
3350impl<'a> ExactSizeIterator for Bytes<'a> {
3351    #[inline]
3352    fn len(&self) -> usize {
3353        self.it.len()
3354    }
3355}
3356
3357impl<'a> iter::FusedIterator for Bytes<'a> {}
3358
3359/// An iterator over the fields in a byte string, separated by whitespace.
3360///
3361/// Whitespace for this iterator is defined by the Unicode property
3362/// `White_Space`.
3363///
3364/// This iterator splits on contiguous runs of whitespace, such that the fields
3365/// in `foo\t\t\n  \nbar` are `foo` and `bar`.
3366///
3367/// `'a` is the lifetime of the byte string being split.
3368#[cfg(feature = "unicode")]
3369#[derive(Debug)]
3370pub struct Fields<'a> {
3371    it: FieldsWith<'a, fn(char) -> bool>,
3372}
3373
3374#[cfg(feature = "unicode")]
3375impl<'a> Fields<'a> {
3376    fn new(bytes: &'a [u8]) -> Fields<'a> {
3377        Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
3378    }
3379}
3380
3381#[cfg(feature = "unicode")]
3382impl<'a> Iterator for Fields<'a> {
3383    type Item = &'a [u8];
3384
3385    #[inline]
3386    fn next(&mut self) -> Option<&'a [u8]> {
3387        self.it.next()
3388    }
3389}
3390
3391/// An iterator over fields in the byte string, separated by a predicate over
3392/// codepoints.
3393///
3394/// This iterator splits a byte string based on its predicate function such
3395/// that the elements returned are separated by contiguous runs of codepoints
3396/// for which the predicate returns true.
3397///
3398/// `'a` is the lifetime of the byte string being split, while `F` is the type
3399/// of the predicate, i.e., `FnMut(char) -> bool`.
3400#[derive(Debug)]
3401pub struct FieldsWith<'a, F> {
3402    f: F,
3403    bytes: &'a [u8],
3404    chars: CharIndices<'a>,
3405}
3406
3407impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
3408    fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
3409        FieldsWith { f, bytes, chars: bytes.char_indices() }
3410    }
3411}
3412
3413impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
3414    type Item = &'a [u8];
3415
3416    #[inline]
3417    fn next(&mut self) -> Option<&'a [u8]> {
3418        let (start, mut end);
3419        loop {
3420            match self.chars.next() {
3421                None => return None,
3422                Some((s, e, ch)) => {
3423                    if !(self.f)(ch) {
3424                        start = s;
3425                        end = e;
3426                        break;
3427                    }
3428                }
3429            }
3430        }
3431        while let Some((_, e, ch)) = self.chars.next() {
3432            if (self.f)(ch) {
3433                break;
3434            }
3435            end = e;
3436        }
3437        Some(&self.bytes[start..end])
3438    }
3439}
3440
3441/// An iterator over substrings in a byte string, split by a separator.
3442///
3443/// `'h` is the lifetime of the byte string being split (the haystack), while
3444/// `'s` is the lifetime of the byte string doing the splitting.
3445#[derive(Debug)]
3446pub struct Split<'h, 's> {
3447    finder: Find<'h, 's>,
3448    /// The end position of the previous match of our splitter. The element
3449    /// we yield corresponds to the substring starting at `last` up to the
3450    /// beginning of the next match of the splitter.
3451    last: usize,
3452    /// Only set when iteration is complete. A corner case here is when a
3453    /// splitter is matched at the end of the haystack. At that point, we still
3454    /// need to yield an empty string following it.
3455    done: bool,
3456}
3457
3458impl<'h, 's> Split<'h, 's> {
3459    fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> {
3460        let finder = haystack.find_iter(splitter);
3461        Split { finder, last: 0, done: false }
3462    }
3463}
3464
3465impl<'h, 's> Iterator for Split<'h, 's> {
3466    type Item = &'h [u8];
3467
3468    #[inline]
3469    fn next(&mut self) -> Option<&'h [u8]> {
3470        let haystack = self.finder.haystack;
3471        match self.finder.next() {
3472            Some(start) => {
3473                let next = &haystack[self.last..start];
3474                self.last = start + self.finder.needle.len();
3475                Some(next)
3476            }
3477            None => {
3478                if self.last >= haystack.len() {
3479                    if !self.done {
3480                        self.done = true;
3481                        Some(b"")
3482                    } else {
3483                        None
3484                    }
3485                } else {
3486                    let s = &haystack[self.last..];
3487                    self.last = haystack.len();
3488                    self.done = true;
3489                    Some(s)
3490                }
3491            }
3492        }
3493    }
3494}
3495
3496/// An iterator over substrings in a byte string, split by a separator, in
3497/// reverse.
3498///
3499/// `'h` is the lifetime of the byte string being split (the haystack), while
3500/// `'s` is the lifetime of the byte string doing the splitting.
3501#[derive(Debug)]
3502pub struct SplitReverse<'h, 's> {
3503    finder: FindReverse<'h, 's>,
3504    /// The end position of the previous match of our splitter. The element
3505    /// we yield corresponds to the substring starting at `last` up to the
3506    /// beginning of the next match of the splitter.
3507    last: usize,
3508    /// Only set when iteration is complete. A corner case here is when a
3509    /// splitter is matched at the end of the haystack. At that point, we still
3510    /// need to yield an empty string following it.
3511    done: bool,
3512}
3513
3514impl<'h, 's> SplitReverse<'h, 's> {
3515    fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> {
3516        let finder = haystack.rfind_iter(splitter);
3517        SplitReverse { finder, last: haystack.len(), done: false }
3518    }
3519}
3520
3521impl<'h, 's> Iterator for SplitReverse<'h, 's> {
3522    type Item = &'h [u8];
3523
3524    #[inline]
3525    fn next(&mut self) -> Option<&'h [u8]> {
3526        let haystack = self.finder.haystack();
3527        match self.finder.next() {
3528            Some(start) => {
3529                let nlen = self.finder.needle().len();
3530                let next = &haystack[start + nlen..self.last];
3531                self.last = start;
3532                Some(next)
3533            }
3534            None => {
3535                if self.last == 0 {
3536                    if !self.done {
3537                        self.done = true;
3538                        Some(b"")
3539                    } else {
3540                        None
3541                    }
3542                } else {
3543                    let s = &haystack[..self.last];
3544                    self.last = 0;
3545                    self.done = true;
3546                    Some(s)
3547                }
3548            }
3549        }
3550    }
3551}
3552
3553/// An iterator over at most `n` substrings in a byte string, split by a
3554/// separator.
3555///
3556/// `'h` is the lifetime of the byte string being split (the haystack), while
3557/// `'s` is the lifetime of the byte string doing the splitting.
3558#[derive(Debug)]
3559pub struct SplitN<'h, 's> {
3560    split: Split<'h, 's>,
3561    limit: usize,
3562    count: usize,
3563}
3564
3565impl<'h, 's> SplitN<'h, 's> {
3566    fn new(
3567        haystack: &'h [u8],
3568        splitter: &'s [u8],
3569        limit: usize,
3570    ) -> SplitN<'h, 's> {
3571        let split = haystack.split_str(splitter);
3572        SplitN { split, limit, count: 0 }
3573    }
3574}
3575
3576impl<'h, 's> Iterator for SplitN<'h, 's> {
3577    type Item = &'h [u8];
3578
3579    #[inline]
3580    fn next(&mut self) -> Option<&'h [u8]> {
3581        self.count += 1;
3582        if self.count > self.limit || self.split.done {
3583            None
3584        } else if self.count == self.limit {
3585            Some(&self.split.finder.haystack[self.split.last..])
3586        } else {
3587            self.split.next()
3588        }
3589    }
3590}
3591
3592/// An iterator over at most `n` substrings in a byte string, split by a
3593/// separator, in reverse.
3594///
3595/// `'h` is the lifetime of the byte string being split (the haystack), while
3596/// `'s` is the lifetime of the byte string doing the splitting.
3597#[derive(Debug)]
3598pub struct SplitNReverse<'h, 's> {
3599    split: SplitReverse<'h, 's>,
3600    limit: usize,
3601    count: usize,
3602}
3603
3604impl<'h, 's> SplitNReverse<'h, 's> {
3605    fn new(
3606        haystack: &'h [u8],
3607        splitter: &'s [u8],
3608        limit: usize,
3609    ) -> SplitNReverse<'h, 's> {
3610        let split = haystack.rsplit_str(splitter);
3611        SplitNReverse { split, limit, count: 0 }
3612    }
3613}
3614
3615impl<'h, 's> Iterator for SplitNReverse<'h, 's> {
3616    type Item = &'h [u8];
3617
3618    #[inline]
3619    fn next(&mut self) -> Option<&'h [u8]> {
3620        self.count += 1;
3621        if self.count > self.limit || self.split.done {
3622            None
3623        } else if self.count == self.limit {
3624            Some(&self.split.finder.haystack()[..self.split.last])
3625        } else {
3626            self.split.next()
3627        }
3628    }
3629}
3630
3631/// An iterator over all lines in a byte string, without their terminators.
3632///
3633/// For this iterator, the only line terminators recognized are `\r\n` and
3634/// `\n`.
3635///
3636/// `'a` is the lifetime of the byte string being iterated over.
3637#[derive(Clone, Debug)]
3638pub struct Lines<'a> {
3639    it: LinesWithTerminator<'a>,
3640}
3641
3642impl<'a> Lines<'a> {
3643    fn new(bytes: &'a [u8]) -> Lines<'a> {
3644        Lines { it: LinesWithTerminator::new(bytes) }
3645    }
3646
3647    /// Return a copy of the rest of the underlying bytes without affecting the
3648    /// iterator itself.
3649    ///
3650    /// # Examples
3651    ///
3652    /// Basic usage:
3653    ///
3654    /// ```
3655    /// use bstr::{B, ByteSlice};
3656    ///
3657    /// let s = b"\
3658    /// foo
3659    /// bar\r
3660    /// baz";
3661    /// let mut lines = s.lines();
3662    /// assert_eq!(lines.next(), Some(B("foo")));
3663    /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
3664    /// ```
3665    pub fn as_bytes(&self) -> &'a [u8] {
3666        self.it.bytes
3667    }
3668}
3669
3670impl<'a> Iterator for Lines<'a> {
3671    type Item = &'a [u8];
3672
3673    #[inline]
3674    fn next(&mut self) -> Option<&'a [u8]> {
3675        Some(trim_last_terminator(self.it.next()?))
3676    }
3677}
3678
3679impl<'a> DoubleEndedIterator for Lines<'a> {
3680    #[inline]
3681    fn next_back(&mut self) -> Option<Self::Item> {
3682        Some(trim_last_terminator(self.it.next_back()?))
3683    }
3684}
3685
3686impl<'a> iter::FusedIterator for Lines<'a> {}
3687
3688/// An iterator over all lines in a byte string, including their terminators.
3689///
3690/// For this iterator, the only line terminator recognized is `\n`. (Since
3691/// line terminators are included, this also handles `\r\n` line endings.)
3692///
3693/// Line terminators are only included if they are present in the original
3694/// byte string. For example, the last line in a byte string may not end with
3695/// a line terminator.
3696///
3697/// Concatenating all elements yielded by this iterator is guaranteed to yield
3698/// the original byte string.
3699///
3700/// `'a` is the lifetime of the byte string being iterated over.
3701#[derive(Clone, Debug)]
3702pub struct LinesWithTerminator<'a> {
3703    bytes: &'a [u8],
3704}
3705
3706impl<'a> LinesWithTerminator<'a> {
3707    fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
3708        LinesWithTerminator { bytes }
3709    }
3710
3711    /// Return a copy of the rest of the underlying bytes without affecting the
3712    /// iterator itself.
3713    ///
3714    /// # Examples
3715    ///
3716    /// Basic usage:
3717    ///
3718    /// ```
3719    /// use bstr::{B, ByteSlice};
3720    ///
3721    /// let s = b"\
3722    /// foo
3723    /// bar\r
3724    /// baz";
3725    /// let mut lines = s.lines_with_terminator();
3726    /// assert_eq!(lines.next(), Some(B("foo\n")));
3727    /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
3728    /// ```
3729    pub fn as_bytes(&self) -> &'a [u8] {
3730        self.bytes
3731    }
3732}
3733
3734impl<'a> Iterator for LinesWithTerminator<'a> {
3735    type Item = &'a [u8];
3736
3737    #[inline]
3738    fn next(&mut self) -> Option<&'a [u8]> {
3739        match self.bytes.find_byte(b'\n') {
3740            None if self.bytes.is_empty() => None,
3741            None => {
3742                let line = self.bytes;
3743                self.bytes = b"";
3744                Some(line)
3745            }
3746            Some(end) => {
3747                let line = &self.bytes[..end + 1];
3748                self.bytes = &self.bytes[end + 1..];
3749                Some(line)
3750            }
3751        }
3752    }
3753}
3754
3755impl<'a> DoubleEndedIterator for LinesWithTerminator<'a> {
3756    #[inline]
3757    fn next_back(&mut self) -> Option<Self::Item> {
3758        let end = self.bytes.len().checked_sub(1)?;
3759        match self.bytes[..end].rfind_byte(b'\n') {
3760            None => {
3761                let line = self.bytes;
3762                self.bytes = b"";
3763                Some(line)
3764            }
3765            Some(end) => {
3766                let line = &self.bytes[end + 1..];
3767                self.bytes = &self.bytes[..end + 1];
3768                Some(line)
3769            }
3770        }
3771    }
3772}
3773
3774impl<'a> iter::FusedIterator for LinesWithTerminator<'a> {}
3775
3776fn trim_last_terminator(mut s: &[u8]) -> &[u8] {
3777    if s.last_byte() == Some(b'\n') {
3778        s = &s[..s.len() - 1];
3779        if s.last_byte() == Some(b'\r') {
3780            s = &s[..s.len() - 1];
3781        }
3782    }
3783    s
3784}
3785
3786#[cfg(all(test, feature = "std"))]
3787mod tests {
3788    use crate::{
3789        ext_slice::{ByteSlice, Lines, LinesWithTerminator, B},
3790        tests::LOSSY_TESTS,
3791    };
3792
3793    #[test]
3794    fn to_str_lossy() {
3795        for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
3796            let got = B(input).to_str_lossy();
3797            assert_eq!(
3798                expected.as_bytes(),
3799                got.as_bytes(),
3800                "to_str_lossy(ith: {:?}, given: {:?})",
3801                i,
3802                input,
3803            );
3804
3805            let mut got = String::new();
3806            B(input).to_str_lossy_into(&mut got);
3807            assert_eq!(
3808                expected.as_bytes(),
3809                got.as_bytes(),
3810                "to_str_lossy_into",
3811            );
3812
3813            let got = String::from_utf8_lossy(input);
3814            assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
3815        }
3816    }
3817
3818    #[test]
3819    fn lines_iteration() {
3820        macro_rules! t {
3821            ($it:expr, $forward:expr) => {
3822                let mut res: Vec<&[u8]> = Vec::from($forward);
3823                assert_eq!($it.collect::<Vec<_>>(), res);
3824                res.reverse();
3825                assert_eq!($it.rev().collect::<Vec<_>>(), res);
3826            };
3827        }
3828
3829        t!(Lines::new(b""), []);
3830        t!(LinesWithTerminator::new(b""), []);
3831
3832        t!(Lines::new(b"\n"), [B("")]);
3833        t!(Lines::new(b"\r\n"), [B("")]);
3834        t!(LinesWithTerminator::new(b"\n"), [B("\n")]);
3835
3836        t!(Lines::new(b"a"), [B("a")]);
3837        t!(LinesWithTerminator::new(b"a"), [B("a")]);
3838
3839        t!(Lines::new(b"abc"), [B("abc")]);
3840        t!(LinesWithTerminator::new(b"abc"), [B("abc")]);
3841
3842        t!(Lines::new(b"abc\n"), [B("abc")]);
3843        t!(Lines::new(b"abc\r\n"), [B("abc")]);
3844        t!(LinesWithTerminator::new(b"abc\n"), [B("abc\n")]);
3845
3846        t!(Lines::new(b"abc\n\n"), [B("abc"), B("")]);
3847        t!(LinesWithTerminator::new(b"abc\n\n"), [B("abc\n"), B("\n")]);
3848
3849        t!(Lines::new(b"abc\n\ndef"), [B("abc"), B(""), B("def")]);
3850        t!(
3851            LinesWithTerminator::new(b"abc\n\ndef"),
3852            [B("abc\n"), B("\n"), B("def")]
3853        );
3854
3855        t!(Lines::new(b"abc\n\ndef\n"), [B("abc"), B(""), B("def")]);
3856        t!(
3857            LinesWithTerminator::new(b"abc\n\ndef\n"),
3858            [B("abc\n"), B("\n"), B("def\n")]
3859        );
3860
3861        t!(Lines::new(b"\na\nb\n"), [B(""), B("a"), B("b")]);
3862        t!(
3863            LinesWithTerminator::new(b"\na\nb\n"),
3864            [B("\n"), B("a\n"), B("b\n")]
3865        );
3866
3867        t!(Lines::new(b"\n\n\n"), [B(""), B(""), B("")]);
3868        t!(LinesWithTerminator::new(b"\n\n\n"), [B("\n"), B("\n"), B("\n")]);
3869    }
3870}