bumpalo/collections/
string.rs

1// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! A UTF-8 encoded, growable string.
12//!
13//! This module contains the [`String`] type and several error types that may
14//! result from working with [`String`]s.
15//!
16//! # Examples
17//!
18//! You can create a new [`String`] from a string literal with [`String::from_str_in`]:
19//!
20//! ```
21//! use bumpalo::{Bump, collections::String};
22//!
23//! let b = Bump::new();
24//!
25//! let s = String::from_str_in("world", &b);
26//! ```
27//!
28//! You can create a new [`String`] from an existing one by concatenating with
29//! `+`:
30//!
31//! [`String`]: struct.String.html
32//! [`String::from_str_in`]: struct.String.html#method.from_str_in
33//!
34//! ```
35//! use bumpalo::{Bump, collections::String};
36//!
37//! let s = "Hello".to_string();
38//!
39//! let message = s + " world!";
40//! ```
41//!
42//! If you have a vector of valid UTF-8 bytes, you can make a [`String`] out of
43//! it. You can do the reverse too.
44//!
45//! ```
46//! use bumpalo::{Bump, collections::String};
47//!
48//! let b = Bump::new();
49//!
50//! let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
51//!
52//! // We know these bytes are valid, so we'll use `unwrap()`.
53//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
54//!
55//! assert_eq!("💖", sparkle_heart);
56//!
57//! let bytes = sparkle_heart.into_bytes();
58//!
59//! assert_eq!(bytes, [240, 159, 146, 150]);
60//! ```
61
62use crate::collections::str::lossy;
63use crate::collections::vec::Vec;
64use crate::Bump;
65use core::char::decode_utf16;
66use core::fmt;
67use core::hash;
68use core::iter::FusedIterator;
69use core::mem;
70use core::ops::Bound::{Excluded, Included, Unbounded};
71use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds};
72use core::ptr;
73use core::str::{self, Chars, Utf8Error};
74use core_alloc::borrow::Cow;
75
76/// Like the `format!` macro for creating `std::string::String`s but for
77/// `bumpalo::collections::String`.
78///
79/// # Examples
80///
81/// ```
82/// use bumpalo::Bump;
83///
84/// let b = Bump::new();
85///
86/// let who = "World";
87/// let s = bumpalo::format!(in &b, "Hello, {}!", who);
88/// assert_eq!(s, "Hello, World!")
89/// ```
90#[macro_export]
91macro_rules! format {
92    ( in $bump:expr, $fmt:expr, $($args:expr),* ) => {{
93        use std::fmt::Write;
94        let bump = $bump;
95        let mut s = $crate::collections::String::new_in(bump);
96        let _ = write!(&mut s, $fmt, $($args),*);
97        s
98    }};
99
100    ( in $bump:expr, $fmt:expr, $($args:expr,)* ) => {
101        $crate::format!(in $bump, $fmt, $($args),*)
102    };
103}
104
105/// A UTF-8 encoded, growable string.
106///
107/// The `String` type is the most common string type that has ownership over the
108/// contents of the string. It has a close relationship with its borrowed
109/// counterpart, the primitive [`str`].
110///
111/// [`str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
112///
113/// # Examples
114///
115/// You can create a `String` from a literal string with [`String::from_iter_in`]:
116///
117/// ```
118/// use bumpalo::{Bump, collections::String};
119///
120/// let b = Bump::new();
121///
122/// let hello = String::from_str_in("Hello, world!", &b);
123/// ```
124///
125/// You can append a [`char`] to a `String` with the [`push`] method, and
126/// append a [`&str`] with the [`push_str`] method:
127///
128/// ```
129/// use bumpalo::{Bump, collections::String};
130///
131/// let b = Bump::new();
132///
133/// let mut hello = String::from_str_in("Hello, ", &b);
134///
135/// hello.push('w');
136/// hello.push_str("orld!");
137/// ```
138///
139/// [`String::from_iter_in`]: #method.from_iter_in
140/// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
141/// [`push`]: #method.push
142/// [`push_str`]: #method.push_str
143///
144/// If you have a vector of UTF-8 bytes, you can create a `String` from it with
145/// the [`from_utf8`] method:
146///
147/// ```
148/// use bumpalo::{Bump, collections::String};
149///
150/// let b = Bump::new();
151///
152/// // some bytes, in a vector
153/// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
154///
155/// // We know these bytes are valid, so we'll use `unwrap()`.
156/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
157///
158/// assert_eq!("💖", sparkle_heart);
159/// ```
160///
161/// [`from_utf8`]: #method.from_utf8
162///
163/// # UTF-8
164///
165/// `String`s are always valid UTF-8. This has a few implications, the first of
166/// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is
167/// similar, but without the UTF-8 constraint. The second implication is that
168/// you cannot index into a `String`:
169///
170/// ```compile_fail,E0277
171/// let s = "hello";
172///
173/// println!("The first letter of s is {}", s[0]); // ERROR!!!
174/// ```
175///
176/// [`OsString`]: https://doc.rust-lang.org/nightly/std/ffi/struct.OsString.html
177///
178/// Indexing is intended to be a constant-time operation, but UTF-8 encoding
179/// does not allow us to do this. Furthermore, it's not clear what sort of
180/// thing the index should return: a byte, a codepoint, or a grapheme cluster.
181/// The [`bytes`] and [`chars`] methods return iterators over the first
182/// two, respectively.
183///
184/// [`bytes`]: #method.bytes
185/// [`chars`]: #method.chars
186///
187/// # Deref
188///
189/// `String`s implement [`Deref`]`<Target=str>`, and so inherit all of [`str`]'s
190/// methods. In addition, this means that you can pass a `String` to a
191/// function which takes a [`&str`] by using an ampersand (`&`):
192///
193/// ```
194/// use bumpalo::{Bump, collections::String};
195///
196/// let b = Bump::new();
197///
198/// fn takes_str(s: &str) { }
199///
200/// let s = String::from_str_in("Hello", &b);
201///
202/// takes_str(&s);
203/// ```
204///
205/// This will create a [`&str`] from the `String` and pass it in. This
206/// conversion is very inexpensive, and so generally, functions will accept
207/// [`&str`]s as arguments unless they need a `String` for some specific
208/// reason.
209///
210/// In certain cases Rust doesn't have enough information to make this
211/// conversion, known as [`Deref`] coercion. In the following example a string
212/// slice [`&'a str`][`&str`] implements the trait `TraitExample`, and the function
213/// `example_func` takes anything that implements the trait. In this case Rust
214/// would need to make two implicit conversions, which Rust doesn't have the
215/// means to do. For that reason, the following example will not compile.
216///
217/// ```compile_fail,E0277
218/// use bumpalo::{Bump, collections::String};
219///
220/// trait TraitExample {}
221///
222/// impl<'a> TraitExample for &'a str {}
223///
224/// fn example_func<A: TraitExample>(example_arg: A) {}
225///
226/// let b = Bump::new();
227/// let example_string = String::from_str_in("example_string", &b);
228/// example_func(&example_string);
229/// ```
230///
231/// There are two options that would work instead. The first would be to
232/// change the line `example_func(&example_string);` to
233/// `example_func(example_string.as_str());`, using the method [`as_str()`]
234/// to explicitly extract the string slice containing the string. The second
235/// way changes `example_func(&example_string);` to
236/// `example_func(&*example_string);`. In this case we are dereferencing a
237/// `String` to a [`str`][`&str`], then referencing the [`str`][`&str`] back to
238/// [`&str`]. The second way is more idiomatic, however both work to do the
239/// conversion explicitly rather than relying on the implicit conversion.
240///
241/// # Representation
242///
243/// A `String` is made up of three components: a pointer to some bytes, a
244/// length, and a capacity. The pointer points to an internal buffer `String`
245/// uses to store its data. The length is the number of bytes currently stored
246/// in the buffer, and the capacity is the size of the buffer in bytes. As such,
247/// the length will always be less than or equal to the capacity.
248///
249/// This buffer is always stored on the heap.
250///
251/// You can look at these with the [`as_ptr`], [`len`], and [`capacity`]
252/// methods:
253///
254/// ```
255/// use bumpalo::{Bump, collections::String};
256/// use std::mem;
257///
258/// let b = Bump::new();
259///
260/// let story = String::from_str_in("Once upon a time...", &b);
261///
262/// let ptr = story.as_ptr();
263/// let len = story.len();
264/// let capacity = story.capacity();
265///
266/// // story has nineteen bytes
267/// assert_eq!(19, len);
268///
269/// // Now that we have our parts, we throw the story away.
270/// mem::forget(story);
271///
272/// // We can re-build a String out of ptr, len, and capacity. This is all
273/// // unsafe because we are responsible for making sure the components are
274/// // valid:
275/// let s = unsafe { String::from_raw_parts_in(ptr as *mut _, len, capacity, &b) } ;
276///
277/// assert_eq!(String::from_str_in("Once upon a time...", &b), s);
278/// ```
279///
280/// [`as_ptr`]: #method.as_ptr
281/// [`len`]: #method.len
282/// [`capacity`]: #method.capacity
283///
284/// If a `String` has enough capacity, adding elements to it will not
285/// re-allocate. For example, consider this program:
286///
287/// ```
288/// use bumpalo::{Bump, collections::String};
289///
290/// let b = Bump::new();
291///
292/// let mut s = String::new_in(&b);
293///
294/// println!("{}", s.capacity());
295///
296/// for _ in 0..5 {
297///     s.push_str("hello");
298///     println!("{}", s.capacity());
299/// }
300/// ```
301///
302/// This will output the following:
303///
304/// ```text
305/// 0
306/// 5
307/// 10
308/// 20
309/// 20
310/// 40
311/// ```
312///
313/// At first, we have no memory allocated at all, but as we append to the
314/// string, it increases its capacity appropriately. If we instead use the
315/// [`with_capacity_in`] method to allocate the correct capacity initially:
316///
317/// ```
318/// use bumpalo::{Bump, collections::String};
319///
320/// let b = Bump::new();
321///
322/// let mut s = String::with_capacity_in(25, &b);
323///
324/// println!("{}", s.capacity());
325///
326/// for _ in 0..5 {
327///     s.push_str("hello");
328///     println!("{}", s.capacity());
329/// }
330/// ```
331///
332/// [`with_capacity_in`]: #method.with_capacity_in
333///
334/// We end up with a different output:
335///
336/// ```text
337/// 25
338/// 25
339/// 25
340/// 25
341/// 25
342/// 25
343/// ```
344///
345/// Here, there's no need to allocate more memory inside the loop.
346///
347/// [`&str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
348/// [`Deref`]: https://doc.rust-lang.org/nightly/std/ops/trait.Deref.html
349/// [`as_str()`]: struct.String.html#method.as_str
350#[derive(PartialOrd, Eq, Ord)]
351pub struct String<'bump> {
352    vec: Vec<'bump, u8>,
353}
354
355/// A possible error value when converting a `String` from a UTF-8 byte vector.
356///
357/// This type is the error type for the [`from_utf8`] method on [`String`]. It
358/// is designed in such a way to carefully avoid reallocations: the
359/// [`into_bytes`] method will give back the byte vector that was used in the
360/// conversion attempt.
361///
362/// [`from_utf8`]: struct.String.html#method.from_utf8
363/// [`String`]: struct.String.html
364/// [`into_bytes`]: struct.FromUtf8Error.html#method.into_bytes
365///
366/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
367/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
368/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error`
369/// through the [`utf8_error`] method.
370///
371/// [`Utf8Error`]: https://doc.rust-lang.org/nightly/std/str/struct.Utf8Error.html
372/// [`std::str`]: https://doc.rust-lang.org/nightly/std/str/index.html
373/// [`u8`]: https://doc.rust-lang.org/nightly/std/primitive.u8.html
374/// [`&str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
375/// [`utf8_error`]: #method.utf8_error
376///
377/// # Examples
378///
379/// Basic usage:
380///
381/// ```
382/// use bumpalo::{Bump, collections::String};
383///
384/// let b = Bump::new();
385///
386/// // some invalid bytes, in a vector
387/// let bytes = bumpalo::vec![in &b; 0, 159];
388///
389/// let value = String::from_utf8(bytes);
390///
391/// assert!(value.is_err());
392/// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
393/// ```
394#[derive(Debug)]
395pub struct FromUtf8Error<'bump> {
396    bytes: Vec<'bump, u8>,
397    error: Utf8Error,
398}
399
400/// A possible error value when converting a `String` from a UTF-16 byte slice.
401///
402/// This type is the error type for the [`from_utf16`] method on [`String`].
403///
404/// [`from_utf16`]: struct.String.html#method.from_utf16
405/// [`String`]: struct.String.html
406///
407/// # Examples
408///
409/// Basic usage:
410///
411/// ```
412/// use bumpalo::{Bump, collections::String};
413///
414/// let b = Bump::new();
415///
416/// // 𝄞mu<invalid>ic
417/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
418///           0xD800, 0x0069, 0x0063];
419///
420/// assert!(String::from_utf16_in(v, &b).is_err());
421/// ```
422#[derive(Debug)]
423pub struct FromUtf16Error(());
424
425impl<'bump> String<'bump> {
426    /// Creates a new empty `String`.
427    ///
428    /// Given that the `String` is empty, this will not allocate any initial
429    /// buffer. While that means that this initial operation is very
430    /// inexpensive, it may cause excessive allocation later when you add
431    /// data. If you have an idea of how much data the `String` will hold,
432    /// consider the [`with_capacity_in`] method to prevent excessive
433    /// re-allocation.
434    ///
435    /// [`with_capacity_in`]: #method.with_capacity_in
436    ///
437    /// # Examples
438    ///
439    /// Basic usage:
440    ///
441    /// ```
442    /// use bumpalo::{Bump, collections::String};
443    ///
444    /// let b = Bump::new();
445    ///
446    /// let s = String::new_in(&b);
447    /// ```
448    #[inline]
449    pub fn new_in(bump: &'bump Bump) -> String<'bump> {
450        String {
451            vec: Vec::new_in(bump),
452        }
453    }
454
455    /// Creates a new empty `String` with a particular capacity.
456    ///
457    /// `String`s have an internal buffer to hold their data. The capacity is
458    /// the length of that buffer, and can be queried with the [`capacity`]
459    /// method. This method creates an empty `String`, but one with an initial
460    /// buffer that can hold `capacity` bytes. This is useful when you may be
461    /// appending a bunch of data to the `String`, reducing the number of
462    /// reallocations it needs to do.
463    ///
464    /// [`capacity`]: #method.capacity
465    ///
466    /// If the given capacity is `0`, no allocation will occur, and this method
467    /// is identical to the [`new_in`] method.
468    ///
469    /// [`new_in`]: #method.new
470    ///
471    /// # Examples
472    ///
473    /// Basic usage:
474    ///
475    /// ```
476    /// use bumpalo::{Bump, collections::String};
477    ///
478    /// let b = Bump::new();
479    ///
480    /// let mut s = String::with_capacity_in(10, &b);
481    ///
482    /// // The String contains no chars, even though it has capacity for more
483    /// assert_eq!(s.len(), 0);
484    ///
485    /// // These are all done without reallocating...
486    /// let cap = s.capacity();
487    /// for _ in 0..10 {
488    ///     s.push('a');
489    /// }
490    ///
491    /// assert_eq!(s.capacity(), cap);
492    ///
493    /// // ...but this may make the vector reallocate
494    /// s.push('a');
495    /// ```
496    #[inline]
497    pub fn with_capacity_in(capacity: usize, bump: &'bump Bump) -> String<'bump> {
498        String {
499            vec: Vec::with_capacity_in(capacity, bump),
500        }
501    }
502
503    /// Converts a vector of bytes to a `String`.
504    ///
505    /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a vector of bytes
506    /// ([`Vec<u8>`]) is made of bytes, so this function converts between the
507    /// two. Not all byte slices are valid `String`s, however: `String`
508    /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
509    /// the bytes are valid UTF-8, and then does the conversion.
510    ///
511    /// If you are sure that the byte slice is valid UTF-8, and you don't want
512    /// to incur the overhead of the validity check, there is an unsafe version
513    /// of this function, [`from_utf8_unchecked`], which has the same behavior
514    /// but skips the check.
515    ///
516    /// This method will take care to not copy the vector, for efficiency's
517    /// sake.
518    ///
519    /// If you need a [`&str`] instead of a `String`, consider
520    /// [`str::from_utf8`].
521    ///
522    /// The inverse of this method is [`as_bytes`].
523    ///
524    /// # Errors
525    ///
526    /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
527    /// provided bytes are not UTF-8. The vector you moved in is also included.
528    ///
529    /// # Examples
530    ///
531    /// Basic usage:
532    ///
533    /// ```
534    /// use bumpalo::{Bump, collections::String};
535    ///
536    /// let b = Bump::new();
537    ///
538    /// // some bytes, in a vector
539    /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
540    ///
541    /// // We know these bytes are valid, so we'll use `unwrap()`.
542    /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
543    ///
544    /// assert_eq!("💖", sparkle_heart);
545    /// ```
546    ///
547    /// Incorrect bytes:
548    ///
549    /// ```
550    /// use bumpalo::{Bump, collections::String};
551    ///
552    /// let b = Bump::new();
553    ///
554    /// // some invalid bytes, in a vector
555    /// let sparkle_heart = bumpalo::vec![in &b; 0, 159, 146, 150];
556    ///
557    /// assert!(String::from_utf8(sparkle_heart).is_err());
558    /// ```
559    ///
560    /// See the docs for [`FromUtf8Error`] for more details on what you can do
561    /// with this error.
562    ///
563    /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
564    /// [`&str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
565    /// [`u8`]: https://doc.rust-lang.org/nightly/std/primitive.u8.html
566    /// [`Vec<u8>`]: ../vec/struct.Vec.html
567    /// [`str::from_utf8`]: https://doc.rust-lang.org/nightly/std/str/fn.from_utf8.html
568    /// [`as_bytes`]: struct.String.html#method.as_bytes
569    /// [`FromUtf8Error`]: struct.FromUtf8Error.html
570    /// [`Err`]: https://doc.rust-lang.org/nightly/std/result/enum.Result.html#variant.Err
571    #[inline]
572    pub fn from_utf8(vec: Vec<'bump, u8>) -> Result<String<'bump>, FromUtf8Error<'bump>> {
573        match str::from_utf8(&vec) {
574            Ok(..) => Ok(String { vec }),
575            Err(e) => Err(FromUtf8Error {
576                bytes: vec,
577                error: e,
578            }),
579        }
580    }
581
582    /// Converts a slice of bytes to a string, including invalid characters.
583    ///
584    /// Strings are made of bytes ([`u8`]), and a slice of bytes
585    /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts
586    /// between the two. Not all byte slices are valid strings, however: strings
587    /// are required to be valid UTF-8. During this conversion,
588    /// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with
589    /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], which looks like this: �
590    ///
591    /// [`u8`]: https://doc.rust-lang.org/nightly/std/primitive.u8.html
592    /// [byteslice]: https://doc.rust-lang.org/nightly/std/primitive.slice.html
593    /// [U+FFFD]: ../char/constant.REPLACEMENT_CHARACTER.html
594    ///
595    /// If you are sure that the byte slice is valid UTF-8, and you don't want
596    /// to incur the overhead of the conversion, there is an unsafe version
597    /// of this function, [`from_utf8_unchecked`], which has the same behavior
598    /// but skips the checks.
599    ///
600    /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
601    ///
602    /// # Examples
603    ///
604    /// Basic usage:
605    ///
606    /// ```
607    /// use bumpalo::{collections::String, Bump, vec};
608    ///
609    /// let b = Bump::new();
610    ///
611    /// // some bytes, in a vector
612    /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
613    ///
614    /// let sparkle_heart = String::from_utf8_lossy_in(&sparkle_heart, &b);
615    ///
616    /// assert_eq!("💖", sparkle_heart);
617    /// ```
618    ///
619    /// Incorrect bytes:
620    ///
621    /// ```
622    /// use bumpalo::{collections::String, Bump, vec};
623    ///
624    /// let b = Bump::new();
625    ///
626    /// // some invalid bytes
627    /// let input = b"Hello \xF0\x90\x80World";
628    /// let output = String::from_utf8_lossy_in(input, &b);
629    ///
630    /// assert_eq!("Hello �World", output);
631    /// ```
632    pub fn from_utf8_lossy_in(v: &[u8], bump: &'bump Bump) -> String<'bump> {
633        let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks();
634
635        let (first_valid, first_broken) = if let Some(chunk) = iter.next() {
636            let lossy::Utf8LossyChunk { valid, broken } = chunk;
637            if valid.len() == v.len() {
638                debug_assert!(broken.is_empty());
639                unsafe {
640                    return String::from_utf8_unchecked(Vec::from_iter_in(v.iter().cloned(), bump));
641                }
642            }
643            (valid, broken)
644        } else {
645            return String::from_str_in("", bump);
646        };
647
648        const REPLACEMENT: &str = "\u{FFFD}";
649
650        let mut res = String::with_capacity_in(v.len(), bump);
651        res.push_str(first_valid);
652        if !first_broken.is_empty() {
653            res.push_str(REPLACEMENT);
654        }
655
656        for lossy::Utf8LossyChunk { valid, broken } in iter {
657            res.push_str(valid);
658            if !broken.is_empty() {
659                res.push_str(REPLACEMENT);
660            }
661        }
662
663        res
664    }
665
666    /// Decode a UTF-16 encoded vector `v` into a `String`, returning [`Err`]
667    /// if `v` contains any invalid data.
668    ///
669    /// [`Err`]: https://doc.rust-lang.org/nightly/std/result/enum.Result.html#variant.Err
670    ///
671    /// # Examples
672    ///
673    /// Basic usage:
674    ///
675    /// ```
676    /// use bumpalo::{Bump, collections::String};
677    ///
678    /// let b = Bump::new();
679    ///
680    /// // 𝄞music
681    /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
682    ///           0x0073, 0x0069, 0x0063];
683    /// assert_eq!(String::from_str_in("𝄞music", &b),
684    ///            String::from_utf16_in(v, &b).unwrap());
685    ///
686    /// // 𝄞mu<invalid>ic
687    /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
688    ///           0xD800, 0x0069, 0x0063];
689    /// assert!(String::from_utf16_in(v, &b).is_err());
690    /// ```
691    pub fn from_utf16_in(v: &[u16], bump: &'bump Bump) -> Result<String<'bump>, FromUtf16Error> {
692        let mut ret = String::with_capacity_in(v.len(), bump);
693        for c in decode_utf16(v.iter().cloned()) {
694            if let Ok(c) = c {
695                ret.push(c);
696            } else {
697                return Err(FromUtf16Error(()));
698            }
699        }
700        Ok(ret)
701    }
702
703    /// Construct a new `String<'bump>` from an iterator of `char`s.
704    ///
705    /// # Examples
706    ///
707    /// ```
708    /// use bumpalo::{Bump, collections::String};
709    ///
710    /// let b = Bump::new();
711    ///
712    /// let s = String::from_str_in("hello", &b);
713    /// assert_eq!(s, "hello");
714    /// ```
715    pub fn from_str_in(s: &str, bump: &'bump Bump) -> String<'bump> {
716        let mut t = String::with_capacity_in(s.len(), bump);
717        t.push_str(s);
718        t
719    }
720
721    /// Construct a new `String<'bump>` from an iterator of `char`s.
722    ///
723    /// # Examples
724    ///
725    /// ```
726    /// use bumpalo::{Bump, collections::String};
727    ///
728    /// let b = Bump::new();
729    ///
730    /// let s = String::from_str_in("hello", &b);
731    /// assert_eq!(s, "hello");
732    /// ```
733    pub fn from_iter_in<I: IntoIterator<Item = char>>(iter: I, bump: &'bump Bump) -> String<'bump> {
734        let mut s = String::new_in(bump);
735        for c in iter {
736            s.push(c);
737        }
738        s
739    }
740
741    /// Creates a new `String` from a length, capacity, and pointer.
742    ///
743    /// # Safety
744    ///
745    /// This is highly unsafe, due to the number of invariants that aren't
746    /// checked:
747    ///
748    /// * The memory at `ptr` needs to have been previously allocated by the
749    ///   same allocator the standard library uses.
750    /// * `length` needs to be less than or equal to `capacity`.
751    /// * `capacity` needs to be the correct value.
752    ///
753    /// Violating these may cause problems like corrupting the allocator's
754    /// internal data structures.
755    ///
756    /// The ownership of `ptr` is effectively transferred to the
757    /// `String` which may then deallocate, reallocate or change the
758    /// contents of memory pointed to by the pointer at will. Ensure
759    /// that nothing else uses the pointer after calling this
760    /// function.
761    ///
762    /// # Examples
763    ///
764    /// Basic usage:
765    ///
766    /// ```
767    /// use bumpalo::{Bump, collections::String};
768    /// use std::mem;
769    ///
770    /// let b = Bump::new();
771    ///
772    /// unsafe {
773    ///     let s = String::from_str_in("hello", &b);
774    ///     let ptr = s.as_ptr();
775    ///     let len = s.len();
776    ///     let capacity = s.capacity();
777    ///
778    ///     mem::forget(s);
779    ///
780    ///     let s = String::from_raw_parts_in(ptr as *mut _, len, capacity, &b);
781    ///
782    ///     assert_eq!(String::from_str_in("hello", &b), s);
783    /// }
784    /// ```
785    #[inline]
786    pub unsafe fn from_raw_parts_in(
787        buf: *mut u8,
788        length: usize,
789        capacity: usize,
790        bump: &'bump Bump,
791    ) -> String<'bump> {
792        String {
793            vec: Vec::from_raw_parts_in(buf, length, capacity, bump),
794        }
795    }
796
797    /// Converts a vector of bytes to a `String` without checking that the
798    /// string contains valid UTF-8.
799    ///
800    /// See the safe version, [`from_utf8`], for more details.
801    ///
802    /// [`from_utf8`]: struct.String.html#method.from_utf8
803    ///
804    /// # Safety
805    ///
806    /// This function is unsafe because it does not check that the bytes passed
807    /// to it are valid UTF-8. If this constraint is violated, it may cause
808    /// memory unsafety issues with future users of the `String`, as the rest of
809    /// the standard library assumes that `String`s are valid UTF-8.
810    ///
811    /// # Examples
812    ///
813    /// Basic usage:
814    ///
815    /// ```
816    /// use bumpalo::{Bump, collections::String};
817    ///
818    /// let b = Bump::new();
819    ///
820    /// // some bytes, in a vector
821    /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
822    ///
823    /// let sparkle_heart = unsafe {
824    ///     String::from_utf8_unchecked(sparkle_heart)
825    /// };
826    ///
827    /// assert_eq!("💖", sparkle_heart);
828    /// ```
829    #[inline]
830    pub unsafe fn from_utf8_unchecked(bytes: Vec<'bump, u8>) -> String<'bump> {
831        String { vec: bytes }
832    }
833
834    /// Converts a `String` into a byte vector.
835    ///
836    /// This consumes the `String`, so we do not need to copy its contents.
837    ///
838    /// # Examples
839    ///
840    /// Basic usage:
841    ///
842    /// ```
843    /// use bumpalo::{Bump, collections::String};
844    ///
845    /// let b = Bump::new();
846    ///
847    /// let s = String::from_str_in("hello", &b);
848    /// let bytes = s.into_bytes();
849    ///
850    /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]);
851    /// ```
852    #[inline]
853    pub fn into_bytes(self) -> Vec<'bump, u8> {
854        self.vec
855    }
856
857    /// Convert this `String<'bump>` into a `&'bump str`. This is analagous to
858    /// `std::string::String::into_boxed_str`.
859    ///
860    /// # Example
861    ///
862    /// ```
863    /// use bumpalo::{Bump, collections::String};
864    ///
865    /// let b = Bump::new();
866    ///
867    /// let s = String::from_str_in("foo", &b);
868    /// let t = s.into_bump_str();
869    /// assert_eq!("foo", t);
870    /// ```
871    pub fn into_bump_str(self) -> &'bump str {
872        let s = unsafe {
873            let s = self.as_str();
874            mem::transmute(s)
875        };
876        mem::forget(self);
877        s
878    }
879
880    /// Extracts a string slice containing the entire `String`.
881    ///
882    /// # Examples
883    ///
884    /// Basic usage:
885    ///
886    /// ```
887    /// use bumpalo::{Bump, collections::String};
888    ///
889    /// let b = Bump::new();
890    ///
891    /// let s = String::from_str_in("foo", &b);
892    ///
893    /// assert_eq!("foo", s.as_str());
894    /// ```
895    #[inline]
896    pub fn as_str(&self) -> &str {
897        self
898    }
899
900    /// Converts a `String` into a mutable string slice.
901    ///
902    /// # Examples
903    ///
904    /// Basic usage:
905    ///
906    /// ```
907    /// use bumpalo::{Bump, collections::String};
908    ///
909    /// let b = Bump::new();
910    ///
911    /// let mut s = String::from_str_in("foobar", &b);
912    /// let s_mut_str = s.as_mut_str();
913    ///
914    /// s_mut_str.make_ascii_uppercase();
915    ///
916    /// assert_eq!("FOOBAR", s_mut_str);
917    /// ```
918    #[inline]
919    pub fn as_mut_str(&mut self) -> &mut str {
920        self
921    }
922
923    /// Appends a given string slice onto the end of this `String`.
924    ///
925    /// # Examples
926    ///
927    /// Basic usage:
928    ///
929    /// ```
930    /// use bumpalo::{Bump, collections::String};
931    ///
932    /// let b = Bump::new();
933    ///
934    /// let mut s = String::from_str_in("foo", &b);
935    ///
936    /// s.push_str("bar");
937    ///
938    /// assert_eq!("foobar", s);
939    /// ```
940    #[inline]
941    pub fn push_str(&mut self, string: &str) {
942        self.vec.extend_from_slice(string.as_bytes())
943    }
944
945    /// Returns this `String`'s capacity, in bytes.
946    ///
947    /// # Examples
948    ///
949    /// Basic usage:
950    ///
951    /// ```
952    /// use bumpalo::{Bump, collections::String};
953    ///
954    /// let b = Bump::new();
955    ///
956    /// let s = String::with_capacity_in(10, &b);
957    ///
958    /// assert!(s.capacity() >= 10);
959    /// ```
960    #[inline]
961    pub fn capacity(&self) -> usize {
962        self.vec.capacity()
963    }
964
965    /// Ensures that this `String`'s capacity is at least `additional` bytes
966    /// larger than its length.
967    ///
968    /// The capacity may be increased by more than `additional` bytes if it
969    /// chooses, to prevent frequent reallocations.
970    ///
971    /// If you do not want this "at least" behavior, see the [`reserve_exact`]
972    /// method.
973    ///
974    /// # Panics
975    ///
976    /// Panics if the new capacity overflows [`usize`].
977    ///
978    /// [`reserve_exact`]: struct.String.html#method.reserve_exact
979    /// [`usize`]: https://doc.rust-lang.org/nightly/std/primitive.usize.html
980    ///
981    /// # Examples
982    ///
983    /// Basic usage:
984    ///
985    /// ```
986    /// use bumpalo::{Bump, collections::String};
987    ///
988    /// let b = Bump::new();
989    ///
990    /// let mut s = String::new_in(&b);
991    ///
992    /// s.reserve(10);
993    ///
994    /// assert!(s.capacity() >= 10);
995    /// ```
996    ///
997    /// This may not actually increase the capacity:
998    ///
999    /// ```
1000    /// use bumpalo::{Bump, collections::String};
1001    ///
1002    /// let b = Bump::new();
1003    ///
1004    /// let mut s = String::with_capacity_in(10, &b);
1005    /// s.push('a');
1006    /// s.push('b');
1007    ///
1008    /// // s now has a length of 2 and a capacity of 10
1009    /// assert_eq!(2, s.len());
1010    /// assert_eq!(10, s.capacity());
1011    ///
1012    /// // Since we already have an extra 8 capacity, calling this...
1013    /// s.reserve(8);
1014    ///
1015    /// // ... doesn't actually increase.
1016    /// assert_eq!(10, s.capacity());
1017    /// ```
1018    #[inline]
1019    pub fn reserve(&mut self, additional: usize) {
1020        self.vec.reserve(additional)
1021    }
1022
1023    /// Ensures that this `String`'s capacity is `additional` bytes
1024    /// larger than its length.
1025    ///
1026    /// Consider using the [`reserve`] method unless you absolutely know
1027    /// better than the allocator.
1028    ///
1029    /// [`reserve`]: #method.reserve
1030    ///
1031    /// # Panics
1032    ///
1033    /// Panics if the new capacity overflows `usize`.
1034    ///
1035    /// # Examples
1036    ///
1037    /// Basic usage:
1038    ///
1039    /// ```
1040    /// use bumpalo::{Bump, collections::String};
1041    ///
1042    /// let b = Bump::new();
1043    ///
1044    /// let mut s = String::new_in(&b);
1045    ///
1046    /// s.reserve_exact(10);
1047    ///
1048    /// assert!(s.capacity() >= 10);
1049    /// ```
1050    ///
1051    /// This may not actually increase the capacity:
1052    ///
1053    /// ```
1054    /// use bumpalo::{Bump, collections::String};
1055    ///
1056    /// let b = Bump::new();
1057    ///
1058    /// let mut s = String::with_capacity_in(10, &b);
1059    /// s.push('a');
1060    /// s.push('b');
1061    ///
1062    /// // s now has a length of 2 and a capacity of 10
1063    /// assert_eq!(2, s.len());
1064    /// assert_eq!(10, s.capacity());
1065    ///
1066    /// // Since we already have an extra 8 capacity, calling this...
1067    /// s.reserve_exact(8);
1068    ///
1069    /// // ... doesn't actually increase.
1070    /// assert_eq!(10, s.capacity());
1071    /// ```
1072    #[inline]
1073    pub fn reserve_exact(&mut self, additional: usize) {
1074        self.vec.reserve_exact(additional)
1075    }
1076
1077    /// Shrinks the capacity of this `String` to match its length.
1078    ///
1079    /// # Examples
1080    ///
1081    /// Basic usage:
1082    ///
1083    /// ```
1084    /// use bumpalo::{Bump, collections::String};
1085    ///
1086    /// let b = Bump::new();
1087    ///
1088    /// let mut s = String::from_str_in("foo", &b);
1089    ///
1090    /// s.reserve(100);
1091    /// assert!(s.capacity() >= 100);
1092    ///
1093    /// s.shrink_to_fit();
1094    /// assert_eq!(3, s.capacity());
1095    /// ```
1096    #[inline]
1097    pub fn shrink_to_fit(&mut self) {
1098        self.vec.shrink_to_fit()
1099    }
1100
1101    /// Appends the given [`char`] to the end of this `String`.
1102    ///
1103    /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1104    ///
1105    /// # Examples
1106    ///
1107    /// Basic usage:
1108    ///
1109    /// ```
1110    /// use bumpalo::{Bump, collections::String};
1111    ///
1112    /// let b = Bump::new();
1113    ///
1114    /// let mut s = String::from_str_in("abc", &b);
1115    ///
1116    /// s.push('1');
1117    /// s.push('2');
1118    /// s.push('3');
1119    ///
1120    /// assert_eq!("abc123", s);
1121    /// ```
1122    #[inline]
1123    pub fn push(&mut self, ch: char) {
1124        match ch.len_utf8() {
1125            1 => self.vec.push(ch as u8),
1126            _ => self
1127                .vec
1128                .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
1129        }
1130    }
1131
1132    /// Returns a byte slice of this `String`'s contents.
1133    ///
1134    /// The inverse of this method is [`from_utf8`].
1135    ///
1136    /// [`from_utf8`]: #method.from_utf8
1137    ///
1138    /// # Examples
1139    ///
1140    /// Basic usage:
1141    ///
1142    /// ```
1143    /// use bumpalo::{Bump, collections::String};
1144    ///
1145    /// let b = Bump::new();
1146    ///
1147    /// let s = String::from_str_in("hello", &b);
1148    ///
1149    /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
1150    /// ```
1151    #[inline]
1152    pub fn as_bytes(&self) -> &[u8] {
1153        &self.vec
1154    }
1155
1156    /// Shortens this `String` to the specified length.
1157    ///
1158    /// If `new_len` is greater than the string's current length, this has no
1159    /// effect.
1160    ///
1161    /// Note that this method has no effect on the allocated capacity
1162    /// of the string
1163    ///
1164    /// # Panics
1165    ///
1166    /// Panics if `new_len` does not lie on a [`char`] boundary.
1167    ///
1168    /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1169    ///
1170    /// # Examples
1171    ///
1172    /// Basic usage:
1173    ///
1174    /// ```
1175    /// use bumpalo::{Bump, collections::String};
1176    ///
1177    /// let b = Bump::new();
1178    ///
1179    /// let mut s = String::from_str_in("hello", &b);
1180    ///
1181    /// s.truncate(2);
1182    ///
1183    /// assert_eq!("he", s);
1184    /// ```
1185    #[inline]
1186    pub fn truncate(&mut self, new_len: usize) {
1187        if new_len <= self.len() {
1188            assert!(self.is_char_boundary(new_len));
1189            self.vec.truncate(new_len)
1190        }
1191    }
1192
1193    /// Removes the last character from the string buffer and returns it.
1194    ///
1195    /// Returns [`None`] if this `String` is empty.
1196    ///
1197    /// [`None`]: https://doc.rust-lang.org/nightly/std/option/enum.Option.html#variant.None
1198    ///
1199    /// # Examples
1200    ///
1201    /// Basic usage:
1202    ///
1203    /// ```
1204    /// use bumpalo::{Bump, collections::String};
1205    ///
1206    /// let b = Bump::new();
1207    ///
1208    /// let mut s = String::from_str_in("foo", &b);
1209    ///
1210    /// assert_eq!(s.pop(), Some('o'));
1211    /// assert_eq!(s.pop(), Some('o'));
1212    /// assert_eq!(s.pop(), Some('f'));
1213    ///
1214    /// assert_eq!(s.pop(), None);
1215    /// ```
1216    #[inline]
1217    pub fn pop(&mut self) -> Option<char> {
1218        let ch = self.chars().rev().next()?;
1219        let newlen = self.len() - ch.len_utf8();
1220        unsafe {
1221            self.vec.set_len(newlen);
1222        }
1223        Some(ch)
1224    }
1225
1226    /// Removes a [`char`] from this `String` at a byte position and returns it.
1227    ///
1228    /// This is an `O(n)` operation, as it requires copying every element in the
1229    /// buffer.
1230    ///
1231    /// # Panics
1232    ///
1233    /// Panics if `idx` is larger than or equal to the `String`'s length,
1234    /// or if it does not lie on a [`char`] boundary.
1235    ///
1236    /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1237    ///
1238    /// # Examples
1239    ///
1240    /// Basic usage:
1241    ///
1242    /// ```
1243    /// use bumpalo::{Bump, collections::String};
1244    ///
1245    /// let b = Bump::new();
1246    ///
1247    /// let mut s = String::from_str_in("foo", &b);
1248    ///
1249    /// assert_eq!(s.remove(0), 'f');
1250    /// assert_eq!(s.remove(1), 'o');
1251    /// assert_eq!(s.remove(0), 'o');
1252    /// ```
1253    #[inline]
1254    pub fn remove(&mut self, idx: usize) -> char {
1255        let ch = match self[idx..].chars().next() {
1256            Some(ch) => ch,
1257            None => panic!("cannot remove a char from the end of a string"),
1258        };
1259
1260        let next = idx + ch.len_utf8();
1261        let len = self.len();
1262        unsafe {
1263            ptr::copy(
1264                self.vec.as_ptr().add(next),
1265                self.vec.as_mut_ptr().add(idx),
1266                len - next,
1267            );
1268            self.vec.set_len(len - (next - idx));
1269        }
1270        ch
1271    }
1272
1273    /// Retains only the characters specified by the predicate.
1274    ///
1275    /// In other words, remove all characters `c` such that `f(c)` returns `false`.
1276    /// This method operates in place and preserves the order of the retained
1277    /// characters.
1278    ///
1279    /// # Examples
1280    ///
1281    /// ```
1282    /// use bumpalo::{Bump, collections::String};
1283    ///
1284    /// let b = Bump::new();
1285    ///
1286    /// let mut s = String::from_str_in("f_o_ob_ar", &b);
1287    ///
1288    /// s.retain(|c| c != '_');
1289    ///
1290    /// assert_eq!(s, "foobar");
1291    /// ```
1292    #[inline]
1293    pub fn retain<F>(&mut self, mut f: F)
1294    where
1295        F: FnMut(char) -> bool,
1296    {
1297        let len = self.len();
1298        let mut del_bytes = 0;
1299        let mut idx = 0;
1300
1301        while idx < len {
1302            let ch = unsafe { self.get_unchecked(idx..len).chars().next().unwrap() };
1303            let ch_len = ch.len_utf8();
1304
1305            if !f(ch) {
1306                del_bytes += ch_len;
1307            } else if del_bytes > 0 {
1308                unsafe {
1309                    ptr::copy(
1310                        self.vec.as_ptr().add(idx),
1311                        self.vec.as_mut_ptr().add(idx - del_bytes),
1312                        ch_len,
1313                    );
1314                }
1315            }
1316
1317            // Point idx to the next char
1318            idx += ch_len;
1319        }
1320
1321        if del_bytes > 0 {
1322            unsafe {
1323                self.vec.set_len(len - del_bytes);
1324            }
1325        }
1326    }
1327
1328    /// Inserts a character into this `String` at a byte position.
1329    ///
1330    /// This is an `O(n)` operation as it requires copying every element in the
1331    /// buffer.
1332    ///
1333    /// # Panics
1334    ///
1335    /// Panics if `idx` is larger than the `String`'s length, or if it does not
1336    /// lie on a [`char`] boundary.
1337    ///
1338    /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1339    ///
1340    /// # Examples
1341    ///
1342    /// Basic usage:
1343    ///
1344    /// ```
1345    /// use bumpalo::{Bump, collections::String};
1346    ///
1347    /// let b = Bump::new();
1348    ///
1349    /// let mut s = String::with_capacity_in(3, &b);
1350    ///
1351    /// s.insert(0, 'f');
1352    /// s.insert(1, 'o');
1353    /// s.insert(2, 'o');
1354    ///
1355    /// assert_eq!("foo", s);
1356    /// ```
1357    #[inline]
1358    pub fn insert(&mut self, idx: usize, ch: char) {
1359        assert!(self.is_char_boundary(idx));
1360        let mut bits = [0; 4];
1361        let bits = ch.encode_utf8(&mut bits).as_bytes();
1362
1363        unsafe {
1364            self.insert_bytes(idx, bits);
1365        }
1366    }
1367
1368    unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) {
1369        let len = self.len();
1370        let amt = bytes.len();
1371        self.vec.reserve(amt);
1372
1373        ptr::copy(
1374            self.vec.as_ptr().add(idx),
1375            self.vec.as_mut_ptr().add(idx + amt),
1376            len - idx,
1377        );
1378        ptr::copy(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt);
1379        self.vec.set_len(len + amt);
1380    }
1381
1382    /// Inserts a string slice into this `String` at a byte position.
1383    ///
1384    /// This is an `O(n)` operation as it requires copying every element in the
1385    /// buffer.
1386    ///
1387    /// # Panics
1388    ///
1389    /// Panics if `idx` is larger than the `String`'s length, or if it does not
1390    /// lie on a [`char`] boundary.
1391    ///
1392    /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1393    ///
1394    /// # Examples
1395    ///
1396    /// Basic usage:
1397    ///
1398    /// ```
1399    /// use bumpalo::{Bump, collections::String};
1400    ///
1401    /// let b = Bump::new();
1402    ///
1403    /// let mut s = String::from_str_in("bar", &b);
1404    ///
1405    /// s.insert_str(0, "foo");
1406    ///
1407    /// assert_eq!("foobar", s);
1408    /// ```
1409    #[inline]
1410    pub fn insert_str(&mut self, idx: usize, string: &str) {
1411        assert!(self.is_char_boundary(idx));
1412
1413        unsafe {
1414            self.insert_bytes(idx, string.as_bytes());
1415        }
1416    }
1417
1418    /// Returns a mutable reference to the contents of this `String`.
1419    ///
1420    /// # Safety
1421    ///
1422    /// This function is unsafe because it does not check that the bytes passed
1423    /// to it are valid UTF-8. If this constraint is violated, it may cause
1424    /// memory unsafety issues with future users of the `String`, as the rest of
1425    /// the standard library assumes that `String`s are valid UTF-8.
1426    ///
1427    /// # Examples
1428    ///
1429    /// Basic usage:
1430    ///
1431    /// ```
1432    /// use bumpalo::{Bump, collections::String};
1433    ///
1434    /// let b = Bump::new();
1435    ///
1436    /// let mut s = String::from_str_in("hello", &b);
1437    ///
1438    /// unsafe {
1439    ///     let vec = s.as_mut_vec();
1440    ///     assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]);
1441    ///
1442    ///     vec.reverse();
1443    /// }
1444    /// assert_eq!(s, "olleh");
1445    /// ```
1446    #[inline]
1447    pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<'bump, u8> {
1448        &mut self.vec
1449    }
1450
1451    /// Returns the length of this `String`, in bytes.
1452    ///
1453    /// # Examples
1454    ///
1455    /// Basic usage:
1456    ///
1457    /// ```
1458    /// use bumpalo::{Bump, collections::String};
1459    ///
1460    /// let b = Bump::new();
1461    ///
1462    /// let a = String::from_str_in("foo", &b);
1463    ///
1464    /// assert_eq!(a.len(), 3);
1465    /// ```
1466    #[inline]
1467    pub fn len(&self) -> usize {
1468        self.vec.len()
1469    }
1470
1471    /// Returns `true` if this `String` has a length of zero.
1472    ///
1473    /// Returns `false` otherwise.
1474    ///
1475    /// # Examples
1476    ///
1477    /// Basic usage:
1478    ///
1479    /// ```
1480    /// use bumpalo::{Bump, collections::String};
1481    ///
1482    /// let b = Bump::new();
1483    ///
1484    /// let mut v = String::new_in(&b);
1485    /// assert!(v.is_empty());
1486    ///
1487    /// v.push('a');
1488    /// assert!(!v.is_empty());
1489    /// ```
1490    #[inline]
1491    pub fn is_empty(&self) -> bool {
1492        self.len() == 0
1493    }
1494
1495    /// Splits the string into two at the given index.
1496    ///
1497    /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and
1498    /// the returned `String` contains bytes `[at, len)`. `at` must be on the
1499    /// boundary of a UTF-8 code point.
1500    ///
1501    /// Note that the capacity of `self` does not change.
1502    ///
1503    /// # Panics
1504    ///
1505    /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last
1506    /// code point of the string.
1507    ///
1508    /// # Examples
1509    ///
1510    /// ```
1511    /// use bumpalo::{Bump, collections::String};
1512    ///
1513    /// let b = Bump::new();
1514    ///
1515    /// let mut hello = String::from_str_in("Hello, World!", &b);
1516    /// let world = hello.split_off(7);
1517    /// assert_eq!(hello, "Hello, ");
1518    /// assert_eq!(world, "World!");
1519    /// ```
1520    #[inline]
1521    pub fn split_off(&mut self, at: usize) -> String<'bump> {
1522        assert!(self.is_char_boundary(at));
1523        let other = self.vec.split_off(at);
1524        unsafe { String::from_utf8_unchecked(other) }
1525    }
1526
1527    /// Truncates this `String`, removing all contents.
1528    ///
1529    /// While this means the `String` will have a length of zero, it does not
1530    /// touch its capacity.
1531    ///
1532    /// # Examples
1533    ///
1534    /// Basic usage:
1535    ///
1536    /// ```
1537    /// use bumpalo::{Bump, collections::String};
1538    ///
1539    /// let b = Bump::new();
1540    ///
1541    /// let mut s = String::from_str_in("foo", &b);
1542    ///
1543    /// s.clear();
1544    ///
1545    /// assert!(s.is_empty());
1546    /// assert_eq!(0, s.len());
1547    /// assert_eq!(3, s.capacity());
1548    /// ```
1549    #[inline]
1550    pub fn clear(&mut self) {
1551        self.vec.clear()
1552    }
1553
1554    /// Creates a draining iterator that removes the specified range in the `String`
1555    /// and yields the removed `chars`.
1556    ///
1557    /// Note: The element range is removed even if the iterator is not
1558    /// consumed until the end.
1559    ///
1560    /// # Panics
1561    ///
1562    /// Panics if the starting point or end point do not lie on a [`char`]
1563    /// boundary, or if they're out of bounds.
1564    ///
1565    /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1566    ///
1567    /// # Examples
1568    ///
1569    /// Basic usage:
1570    ///
1571    /// ```
1572    /// use bumpalo::{Bump, collections::String};
1573    ///
1574    /// let b = Bump::new();
1575    ///
1576    /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1577    /// let beta_offset = s.find('β').unwrap_or(s.len());
1578    ///
1579    /// // Remove the range up until the β from the string
1580    /// let t = String::from_iter_in(s.drain(..beta_offset), &b);
1581    /// assert_eq!(t, "α is alpha, ");
1582    /// assert_eq!(s, "β is beta");
1583    ///
1584    /// // A full range clears the string
1585    /// s.drain(..);
1586    /// assert_eq!(s, "");
1587    /// ```
1588    pub fn drain<'a, R>(&'a mut self, range: R) -> Drain<'a, 'bump>
1589    where
1590        R: RangeBounds<usize>,
1591    {
1592        // Memory safety
1593        //
1594        // The String version of Drain does not have the memory safety issues
1595        // of the vector version. The data is just plain bytes.
1596        // Because the range removal happens in Drop, if the Drain iterator is leaked,
1597        // the removal will not happen.
1598        let len = self.len();
1599        let start = match range.start_bound() {
1600            Included(&n) => n,
1601            Excluded(&n) => n + 1,
1602            Unbounded => 0,
1603        };
1604        let end = match range.end_bound() {
1605            Included(&n) => n + 1,
1606            Excluded(&n) => n,
1607            Unbounded => len,
1608        };
1609
1610        // Take out two simultaneous borrows. The &mut String won't be accessed
1611        // until iteration is over, in Drop.
1612        let self_ptr = self as *mut _;
1613        // slicing does the appropriate bounds checks
1614        let chars_iter = self[start..end].chars();
1615
1616        Drain {
1617            start,
1618            end,
1619            iter: chars_iter,
1620            string: self_ptr,
1621        }
1622    }
1623
1624    /// Removes the specified range in the string,
1625    /// and replaces it with the given string.
1626    /// The given string doesn't need to be the same length as the range.
1627    ///
1628    /// # Panics
1629    ///
1630    /// Panics if the starting point or end point do not lie on a [`char`]
1631    /// boundary, or if they're out of bounds.
1632    ///
1633    /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1634    /// [`Vec::splice`]: ../vec/struct.Vec.html#method.splice
1635    ///
1636    /// # Examples
1637    ///
1638    /// Basic usage:
1639    ///
1640    /// ```
1641    /// use bumpalo::{Bump, collections::String};
1642    ///
1643    /// let b = Bump::new();
1644    ///
1645    /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1646    /// let beta_offset = s.find('β').unwrap_or(s.len());
1647    ///
1648    /// // Replace the range up until the β from the string
1649    /// s.replace_range(..beta_offset, "Α is capital alpha; ");
1650    /// assert_eq!(s, "Α is capital alpha; β is beta");
1651    /// ```
1652    pub fn replace_range<R>(&mut self, range: R, replace_with: &str)
1653    where
1654        R: RangeBounds<usize>,
1655    {
1656        // Memory safety
1657        //
1658        // Replace_range does not have the memory safety issues of a vector Splice.
1659        // of the vector version. The data is just plain bytes.
1660
1661        match range.start_bound() {
1662            Included(&n) => assert!(self.is_char_boundary(n)),
1663            Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
1664            Unbounded => {}
1665        };
1666        match range.end_bound() {
1667            Included(&n) => assert!(self.is_char_boundary(n + 1)),
1668            Excluded(&n) => assert!(self.is_char_boundary(n)),
1669            Unbounded => {}
1670        };
1671
1672        unsafe { self.as_mut_vec() }.splice(range, replace_with.bytes());
1673    }
1674}
1675
1676impl<'bump> FromUtf8Error<'bump> {
1677    /// Returns a slice of [`u8`]s bytes that were attempted to convert to a `String`.
1678    ///
1679    /// # Examples
1680    ///
1681    /// Basic usage:
1682    ///
1683    /// ```
1684    /// use bumpalo::{Bump, collections::String};
1685    ///
1686    /// let b = Bump::new();
1687    ///
1688    /// // some invalid bytes, in a vector
1689    /// let bytes = bumpalo::vec![in &b; 0, 159];
1690    ///
1691    /// let value = String::from_utf8(bytes);
1692    ///
1693    /// assert_eq!(&[0, 159], value.unwrap_err().as_bytes());
1694    /// ```
1695    pub fn as_bytes(&self) -> &[u8] {
1696        &self.bytes[..]
1697    }
1698
1699    /// Returns the bytes that were attempted to convert to a `String`.
1700    ///
1701    /// This method is carefully constructed to avoid allocation. It will
1702    /// consume the error, moving out the bytes, so that a copy of the bytes
1703    /// does not need to be made.
1704    ///
1705    /// # Examples
1706    ///
1707    /// Basic usage:
1708    ///
1709    /// ```
1710    /// use bumpalo::{Bump, collections::String};
1711    ///
1712    /// let b = Bump::new();
1713    ///
1714    /// // some invalid bytes, in a vector
1715    /// let bytes = bumpalo::vec![in &b; 0, 159];
1716    ///
1717    /// let value = String::from_utf8(bytes);
1718    ///
1719    /// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
1720    /// ```
1721    pub fn into_bytes(self) -> Vec<'bump, u8> {
1722        self.bytes
1723    }
1724
1725    /// Fetch a `Utf8Error` to get more details about the conversion failure.
1726    ///
1727    /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
1728    /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
1729    /// an analogue to `FromUtf8Error`. See its documentation for more details
1730    /// on using it.
1731    ///
1732    /// [`Utf8Error`]: https://doc.rust-lang.org/nightly/std/str/struct.Utf8Error.html
1733    /// [`std::str`]: https://doc.rust-lang.org/nightly/std/str/index.html
1734    /// [`u8`]: https://doc.rust-lang.org/nightly/std/primitive.u8.html
1735    /// [`&str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
1736    ///
1737    /// # Examples
1738    ///
1739    /// Basic usage:
1740    ///
1741    /// ```
1742    /// use bumpalo::{Bump, collections::String};
1743    ///
1744    /// let b = Bump::new();
1745    ///
1746    /// // some invalid bytes, in a vector
1747    /// let bytes = bumpalo::vec![in &b; 0, 159];
1748    ///
1749    /// let error = String::from_utf8(bytes).unwrap_err().utf8_error();
1750    ///
1751    /// // the first byte is invalid here
1752    /// assert_eq!(1, error.valid_up_to());
1753    /// ```
1754    pub fn utf8_error(&self) -> Utf8Error {
1755        self.error
1756    }
1757}
1758
1759impl<'bump> fmt::Display for FromUtf8Error<'bump> {
1760    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1761        fmt::Display::fmt(&self.error, f)
1762    }
1763}
1764
1765impl fmt::Display for FromUtf16Error {
1766    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1767        fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
1768    }
1769}
1770
1771impl<'bump> Clone for String<'bump> {
1772    fn clone(&self) -> Self {
1773        String {
1774            vec: self.vec.clone(),
1775        }
1776    }
1777
1778    fn clone_from(&mut self, source: &Self) {
1779        self.vec.clone_from(&source.vec);
1780    }
1781}
1782
1783impl<'bump> Extend<char> for String<'bump> {
1784    fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) {
1785        let iterator = iter.into_iter();
1786        let (lower_bound, _) = iterator.size_hint();
1787        self.reserve(lower_bound);
1788        for ch in iterator {
1789            self.push(ch)
1790        }
1791    }
1792}
1793
1794impl<'a, 'bump> Extend<&'a char> for String<'bump> {
1795    fn extend<I: IntoIterator<Item = &'a char>>(&mut self, iter: I) {
1796        self.extend(iter.into_iter().cloned());
1797    }
1798}
1799
1800impl<'a, 'bump> Extend<&'a str> for String<'bump> {
1801    fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) {
1802        for s in iter {
1803            self.push_str(s)
1804        }
1805    }
1806}
1807
1808impl<'bump> Extend<String<'bump>> for String<'bump> {
1809    fn extend<I: IntoIterator<Item = String<'bump>>>(&mut self, iter: I) {
1810        for s in iter {
1811            self.push_str(&s)
1812        }
1813    }
1814}
1815
1816impl<'bump> Extend<core_alloc::string::String> for String<'bump> {
1817    fn extend<I: IntoIterator<Item = core_alloc::string::String>>(&mut self, iter: I) {
1818        for s in iter {
1819            self.push_str(&s)
1820        }
1821    }
1822}
1823
1824impl<'a, 'bump> Extend<Cow<'a, str>> for String<'bump> {
1825    fn extend<I: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: I) {
1826        for s in iter {
1827            self.push_str(&s)
1828        }
1829    }
1830}
1831
1832impl<'bump> PartialEq for String<'bump> {
1833    #[inline]
1834    fn eq(&self, other: &String) -> bool {
1835        PartialEq::eq(&self[..], &other[..])
1836    }
1837}
1838
1839macro_rules! impl_eq {
1840    ($lhs:ty, $rhs: ty) => {
1841        impl<'a, 'bump> PartialEq<$rhs> for $lhs {
1842            #[inline]
1843            fn eq(&self, other: &$rhs) -> bool {
1844                PartialEq::eq(&self[..], &other[..])
1845            }
1846        }
1847
1848        impl<'a, 'b, 'bump> PartialEq<$lhs> for $rhs {
1849            #[inline]
1850            fn eq(&self, other: &$lhs) -> bool {
1851                PartialEq::eq(&self[..], &other[..])
1852            }
1853        }
1854    };
1855}
1856
1857impl_eq! { String<'bump>, str }
1858impl_eq! { String<'bump>, &'a str }
1859impl_eq! { Cow<'a, str>, String<'bump> }
1860impl_eq! { core_alloc::string::String, String<'bump> }
1861
1862impl<'bump> fmt::Display for String<'bump> {
1863    #[inline]
1864    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1865        fmt::Display::fmt(&**self, f)
1866    }
1867}
1868
1869impl<'bump> fmt::Debug for String<'bump> {
1870    #[inline]
1871    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1872        fmt::Debug::fmt(&**self, f)
1873    }
1874}
1875
1876impl<'bump> hash::Hash for String<'bump> {
1877    #[inline]
1878    fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
1879        (**self).hash(hasher)
1880    }
1881}
1882
1883/// Implements the `+` operator for concatenating two strings.
1884///
1885/// This consumes the `String<'bump>` on the left-hand side and re-uses its buffer (growing it if
1886/// necessary). This is done to avoid allocating a new `String<'bump>` and copying the entire contents on
1887/// every operation, which would lead to `O(n^2)` running time when building an `n`-byte string by
1888/// repeated concatenation.
1889///
1890/// The string on the right-hand side is only borrowed; its contents are copied into the returned
1891/// `String<'bump>`.
1892///
1893/// # Examples
1894///
1895/// Concatenating two `String<'bump>`s takes the first by value and borrows the second:
1896///
1897/// ```
1898/// use bumpalo::{Bump, collections::String};
1899///
1900/// let bump = Bump::new();
1901///
1902/// let a = String::from_str_in("hello", &bump);
1903/// let b = String::from_str_in(" world", &bump);
1904/// let c = a + &b;
1905/// // `a` is moved and can no longer be used here.
1906/// ```
1907///
1908/// If you want to keep using the first `String`, you can clone it and append to the clone instead:
1909///
1910/// ```
1911/// use bumpalo::{Bump, collections::String};
1912///
1913/// let bump = Bump::new();
1914///
1915/// let a = String::from_str_in("hello", &bump);
1916/// let b = String::from_str_in(" world", &bump);
1917/// let c = a.clone() + &b;
1918/// // `a` is still valid here.
1919/// ```
1920///
1921/// Concatenating `&str` slices can be done by converting the first to a `String`:
1922///
1923/// ```
1924/// use bumpalo::{Bump, collections::String};
1925///
1926/// let b = Bump::new();
1927///
1928/// let a = "hello";
1929/// let b = " world";
1930/// let c = a.to_string() + b;
1931/// ```
1932impl<'a, 'bump> Add<&'a str> for String<'bump> {
1933    type Output = String<'bump>;
1934
1935    #[inline]
1936    fn add(mut self, other: &str) -> String<'bump> {
1937        self.push_str(other);
1938        self
1939    }
1940}
1941
1942/// Implements the `+=` operator for appending to a `String<'bump>`.
1943///
1944/// This has the same behavior as the [`push_str`][String::push_str] method.
1945impl<'a, 'bump> AddAssign<&'a str> for String<'bump> {
1946    #[inline]
1947    fn add_assign(&mut self, other: &str) {
1948        self.push_str(other);
1949    }
1950}
1951
1952impl<'bump> ops::Index<ops::Range<usize>> for String<'bump> {
1953    type Output = str;
1954
1955    #[inline]
1956    fn index(&self, index: ops::Range<usize>) -> &str {
1957        &self[..][index]
1958    }
1959}
1960impl<'bump> ops::Index<ops::RangeTo<usize>> for String<'bump> {
1961    type Output = str;
1962
1963    #[inline]
1964    fn index(&self, index: ops::RangeTo<usize>) -> &str {
1965        &self[..][index]
1966    }
1967}
1968impl<'bump> ops::Index<ops::RangeFrom<usize>> for String<'bump> {
1969    type Output = str;
1970
1971    #[inline]
1972    fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1973        &self[..][index]
1974    }
1975}
1976impl<'bump> ops::Index<ops::RangeFull> for String<'bump> {
1977    type Output = str;
1978
1979    #[inline]
1980    fn index(&self, _index: ops::RangeFull) -> &str {
1981        unsafe { str::from_utf8_unchecked(&self.vec) }
1982    }
1983}
1984impl<'bump> ops::Index<ops::RangeInclusive<usize>> for String<'bump> {
1985    type Output = str;
1986
1987    #[inline]
1988    fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1989        Index::index(&**self, index)
1990    }
1991}
1992impl<'bump> ops::Index<ops::RangeToInclusive<usize>> for String<'bump> {
1993    type Output = str;
1994
1995    #[inline]
1996    fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1997        Index::index(&**self, index)
1998    }
1999}
2000
2001impl<'bump> ops::IndexMut<ops::Range<usize>> for String<'bump> {
2002    #[inline]
2003    fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
2004        &mut self[..][index]
2005    }
2006}
2007impl<'bump> ops::IndexMut<ops::RangeTo<usize>> for String<'bump> {
2008    #[inline]
2009    fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
2010        &mut self[..][index]
2011    }
2012}
2013impl<'bump> ops::IndexMut<ops::RangeFrom<usize>> for String<'bump> {
2014    #[inline]
2015    fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
2016        &mut self[..][index]
2017    }
2018}
2019impl<'bump> ops::IndexMut<ops::RangeFull> for String<'bump> {
2020    #[inline]
2021    fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
2022        unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2023    }
2024}
2025impl<'bump> ops::IndexMut<ops::RangeInclusive<usize>> for String<'bump> {
2026    #[inline]
2027    fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
2028        IndexMut::index_mut(&mut **self, index)
2029    }
2030}
2031impl<'bump> ops::IndexMut<ops::RangeToInclusive<usize>> for String<'bump> {
2032    #[inline]
2033    fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
2034        IndexMut::index_mut(&mut **self, index)
2035    }
2036}
2037
2038impl<'bump> ops::Deref for String<'bump> {
2039    type Target = str;
2040
2041    #[inline]
2042    fn deref(&self) -> &str {
2043        unsafe { str::from_utf8_unchecked(&self.vec) }
2044    }
2045}
2046
2047impl<'bump> ops::DerefMut for String<'bump> {
2048    #[inline]
2049    fn deref_mut(&mut self) -> &mut str {
2050        unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2051    }
2052}
2053
2054impl<'bump> AsRef<str> for String<'bump> {
2055    #[inline]
2056    fn as_ref(&self) -> &str {
2057        self
2058    }
2059}
2060
2061impl<'bump> AsRef<[u8]> for String<'bump> {
2062    #[inline]
2063    fn as_ref(&self) -> &[u8] {
2064        self.as_bytes()
2065    }
2066}
2067
2068impl<'bump> fmt::Write for String<'bump> {
2069    #[inline]
2070    fn write_str(&mut self, s: &str) -> fmt::Result {
2071        self.push_str(s);
2072        Ok(())
2073    }
2074
2075    #[inline]
2076    fn write_char(&mut self, c: char) -> fmt::Result {
2077        self.push(c);
2078        Ok(())
2079    }
2080}
2081
2082/// A draining iterator for `String`.
2083///
2084/// This struct is created by the [`drain`] method on [`String`]. See its
2085/// documentation for more.
2086///
2087/// [`drain`]: struct.String.html#method.drain
2088/// [`String`]: struct.String.html
2089pub struct Drain<'a, 'bump> {
2090    /// Will be used as &'a mut String in the destructor
2091    string: *mut String<'bump>,
2092    /// Start of part to remove
2093    start: usize,
2094    /// End of part to remove
2095    end: usize,
2096    /// Current remaining range to remove
2097    iter: Chars<'a>,
2098}
2099
2100impl<'a, 'bump> fmt::Debug for Drain<'a, 'bump> {
2101    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2102        f.pad("Drain { .. }")
2103    }
2104}
2105
2106unsafe impl<'a, 'bump> Sync for Drain<'a, 'bump> {}
2107unsafe impl<'a, 'bump> Send for Drain<'a, 'bump> {}
2108
2109impl<'a, 'bump> Drop for Drain<'a, 'bump> {
2110    fn drop(&mut self) {
2111        unsafe {
2112            // Use Vec::drain. "Reaffirm" the bounds checks to avoid
2113            // panic code being inserted again.
2114            let self_vec = (*self.string).as_mut_vec();
2115            if self.start <= self.end && self.end <= self_vec.len() {
2116                self_vec.drain(self.start..self.end);
2117            }
2118        }
2119    }
2120}
2121
2122impl<'a, 'bump> Iterator for Drain<'a, 'bump> {
2123    type Item = char;
2124
2125    #[inline]
2126    fn next(&mut self) -> Option<char> {
2127        self.iter.next()
2128    }
2129
2130    fn size_hint(&self) -> (usize, Option<usize>) {
2131        self.iter.size_hint()
2132    }
2133}
2134
2135impl<'a, 'bump> DoubleEndedIterator for Drain<'a, 'bump> {
2136    #[inline]
2137    fn next_back(&mut self) -> Option<char> {
2138        self.iter.next_back()
2139    }
2140}
2141
2142impl<'a, 'bump> FusedIterator for Drain<'a, 'bump> {}