bumpalo/collections/
string.rs

1// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! A UTF-8 encoded, growable string.
12//!
13//! This module contains the [`String`] type and several error types that may
14//! result from working with [`String`]s.
15//!
16//! This module is a fork of the [`std::string`] module, that uses a bump allocator.
17//!
18//! [`std::string`]: https://doc.rust-lang.org/std/string/index.html
19//!
20//! # Examples
21//!
22//! You can create a new [`String`] from a string literal with [`String::from_str_in`]:
23//!
24//! ```
25//! use bumpalo::{Bump, collections::String};
26//!
27//! let b = Bump::new();
28//!
29//! let s = String::from_str_in("world", &b);
30//! ```
31//!
32//! [`String`]: struct.String.html
33//! [`String::from_str_in`]: struct.String.html#method.from_str_in
34//!
35//! If you have a vector of valid UTF-8 bytes, you can make a [`String`] out of
36//! it. You can do the reverse too.
37//!
38//! ```
39//! use bumpalo::{Bump, collections::String};
40//!
41//! let b = Bump::new();
42//!
43//! let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
44//!
45//! // We know these bytes are valid, so we'll use `unwrap()`.
46//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
47//!
48//! assert_eq!("💖", sparkle_heart);
49//!
50//! let bytes = sparkle_heart.into_bytes();
51//!
52//! assert_eq!(bytes, [240, 159, 146, 150]);
53//! ```
54
55use crate::collections::str::lossy;
56use crate::collections::vec::Vec;
57use crate::Bump;
58use core::borrow::{Borrow, BorrowMut};
59use core::char::decode_utf16;
60use core::fmt;
61use core::hash;
62use core::iter::FusedIterator;
63use core::mem;
64use core::ops::Bound::{Excluded, Included, Unbounded};
65use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds};
66use core::ptr;
67use core::str::{self, Chars, Utf8Error};
68use core_alloc::borrow::Cow;
69
70/// Like the [`format!`] macro, but for creating [`bumpalo::collections::String`]s.
71///
72/// [`format!`]: https://doc.rust-lang.org/std/macro.format.html
73/// [`bumpalo::collections::String`]: collections/string/struct.String.html
74///
75/// # Examples
76///
77/// ```
78/// use bumpalo::Bump;
79///
80/// let b = Bump::new();
81///
82/// let who = "World";
83/// let s = bumpalo::format!(in &b, "Hello, {}!", who);
84/// assert_eq!(s, "Hello, World!")
85/// ```
86#[macro_export]
87macro_rules! format {
88    ( in $bump:expr, $fmt:expr, $($args:expr),* ) => {{
89        use $crate::core_alloc::fmt::Write;
90        let bump = $bump;
91        let mut s = $crate::collections::String::new_in(bump);
92        let _ = write!(&mut s, $fmt, $($args),*);
93        s
94    }};
95
96    ( in $bump:expr, $fmt:expr, $($args:expr,)* ) => {
97        $crate::format!(in $bump, $fmt, $($args),*)
98    };
99}
100
101/// A UTF-8 encoded, growable string.
102///
103/// The `String` type is the most common string type that has ownership over the
104/// contents of the string. It has a close relationship with its borrowed
105/// counterpart, the primitive [`str`].
106///
107/// [`str`]: https://doc.rust-lang.org/std/primitive.str.html
108///
109/// # Examples
110///
111/// You can create a `String` from a literal string with [`String::from_str_in`]:
112///
113/// ```
114/// use bumpalo::{Bump, collections::String};
115///
116/// let b = Bump::new();
117///
118/// let hello = String::from_str_in("Hello, world!", &b);
119/// ```
120///
121/// You can append a [`char`] to a `String` with the [`push`] method, and
122/// append a [`&str`] with the [`push_str`] method:
123///
124/// ```
125/// use bumpalo::{Bump, collections::String};
126///
127/// let b = Bump::new();
128///
129/// let mut hello = String::from_str_in("Hello, ", &b);
130///
131/// hello.push('w');
132/// hello.push_str("orld!");
133/// ```
134///
135/// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
136/// [`push`]: #method.push
137/// [`push_str`]: #method.push_str
138///
139/// If you have a vector of UTF-8 bytes, you can create a `String` from it with
140/// the [`from_utf8`] method:
141///
142/// ```
143/// use bumpalo::{Bump, collections::String};
144///
145/// let b = Bump::new();
146///
147/// // some bytes, in a vector
148/// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
149///
150/// // We know these bytes are valid, so we'll use `unwrap()`.
151/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
152///
153/// assert_eq!("💖", sparkle_heart);
154/// ```
155///
156/// [`from_utf8`]: #method.from_utf8
157///
158/// # Deref
159///
160/// `String`s implement <code>[`Deref`]<Target = [`str`]></code>, and so inherit all of [`str`]'s
161/// methods. In addition, this means that you can pass a `String` to a
162/// function which takes a [`&str`] by using an ampersand (`&`):
163///
164/// ```
165/// use bumpalo::{Bump, collections::String};
166///
167/// let b = Bump::new();
168///
169/// fn takes_str(s: &str) { }
170///
171/// let s = String::from_str_in("Hello", &b);
172///
173/// takes_str(&s);
174/// ```
175///
176/// This will create a [`&str`] from the `String` and pass it in. This
177/// conversion is very inexpensive, and so generally, functions will accept
178/// [`&str`]s as arguments unless they need a `String` for some specific
179/// reason.
180///
181/// In certain cases Rust doesn't have enough information to make this
182/// conversion, known as [`Deref`] coercion. In the following example a string
183/// slice [`&'a str`][`&str`] implements the trait `TraitExample`, and the function
184/// `example_func` takes anything that implements the trait. In this case Rust
185/// would need to make two implicit conversions, which Rust doesn't have the
186/// means to do. For that reason, the following example will not compile.
187///
188/// ```compile_fail,E0277
189/// use bumpalo::{Bump, collections::String};
190///
191/// trait TraitExample {}
192///
193/// impl<'a> TraitExample for &'a str {}
194///
195/// fn example_func<A: TraitExample>(example_arg: A) {}
196///
197/// let b = Bump::new();
198/// let example_string = String::from_str_in("example_string", &b);
199/// example_func(&example_string);
200/// ```
201///
202/// There are two options that would work instead. The first would be to
203/// change the line `example_func(&example_string);` to
204/// `example_func(example_string.as_str());`, using the method [`as_str()`]
205/// to explicitly extract the string slice containing the string. The second
206/// way changes `example_func(&example_string);` to
207/// `example_func(&*example_string);`. In this case we are dereferencing a
208/// `String` to a [`str`][`&str`], then referencing the [`str`][`&str`] back to
209/// [`&str`]. The second way is more idiomatic, however both work to do the
210/// conversion explicitly rather than relying on the implicit conversion.
211///
212/// # Representation
213///
214/// A `String` is made up of three components: a pointer to some bytes, a
215/// length, and a capacity. The pointer points to an internal buffer `String`
216/// uses to store its data. The length is the number of bytes currently stored
217/// in the buffer, and the capacity is the size of the buffer in bytes. As such,
218/// the length will always be less than or equal to the capacity.
219///
220/// This buffer is always stored on the heap.
221///
222/// You can look at these with the [`as_ptr`], [`len`], and [`capacity`]
223/// methods:
224///
225/// ```
226/// use bumpalo::{Bump, collections::String};
227/// use std::mem;
228///
229/// let b = Bump::new();
230///
231/// let mut story = String::from_str_in("Once upon a time...", &b);
232///
233/// let ptr = story.as_mut_ptr();
234/// let len = story.len();
235/// let capacity = story.capacity();
236///
237/// // story has nineteen bytes
238/// assert_eq!(19, len);
239///
240/// // Now that we have our parts, we throw the story away.
241/// mem::forget(story);
242///
243/// // We can re-build a String out of ptr, len, and capacity. This is all
244/// // unsafe because we are responsible for making sure the components are
245/// // valid:
246/// let s = unsafe { String::from_raw_parts_in(ptr, len, capacity, &b) } ;
247///
248/// assert_eq!(String::from_str_in("Once upon a time...", &b), s);
249/// ```
250///
251/// [`as_ptr`]: https://doc.rust-lang.org/std/primitive.str.html#method.as_ptr
252/// [`len`]: #method.len
253/// [`capacity`]: #method.capacity
254///
255/// If a `String` has enough capacity, adding elements to it will not
256/// re-allocate. For example, consider this program:
257///
258/// ```
259/// use bumpalo::{Bump, collections::String};
260///
261/// let b = Bump::new();
262///
263/// let mut s = String::new_in(&b);
264///
265/// println!("{}", s.capacity());
266///
267/// for _ in 0..5 {
268///     s.push_str("hello");
269///     println!("{}", s.capacity());
270/// }
271/// ```
272///
273/// This will output the following:
274///
275/// ```text
276/// 0
277/// 5
278/// 10
279/// 20
280/// 20
281/// 40
282/// ```
283///
284/// At first, we have no memory allocated at all, but as we append to the
285/// string, it increases its capacity appropriately. If we instead use the
286/// [`with_capacity_in`] method to allocate the correct capacity initially:
287///
288/// ```
289/// use bumpalo::{Bump, collections::String};
290///
291/// let b = Bump::new();
292///
293/// let mut s = String::with_capacity_in(25, &b);
294///
295/// println!("{}", s.capacity());
296///
297/// for _ in 0..5 {
298///     s.push_str("hello");
299///     println!("{}", s.capacity());
300/// }
301/// ```
302///
303/// [`with_capacity_in`]: #method.with_capacity_in
304///
305/// We end up with a different output:
306///
307/// ```text
308/// 25
309/// 25
310/// 25
311/// 25
312/// 25
313/// 25
314/// ```
315///
316/// Here, there's no need to allocate more memory inside the loop.
317///
318/// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
319/// [`Deref`]: https://doc.rust-lang.org/std/ops/trait.Deref.html
320/// [`as_str()`]: struct.String.html#method.as_str
321#[derive(PartialOrd, Eq, Ord)]
322pub struct String<'bump> {
323    vec: Vec<'bump, u8>,
324}
325
326/// A possible error value when converting a `String` from a UTF-8 byte vector.
327///
328/// This type is the error type for the [`from_utf8`] method on [`String`]. It
329/// is designed in such a way to carefully avoid reallocations: the
330/// [`into_bytes`] method will give back the byte vector that was used in the
331/// conversion attempt.
332///
333/// [`from_utf8`]: struct.String.html#method.from_utf8
334/// [`String`]: struct.String.html
335/// [`into_bytes`]: struct.FromUtf8Error.html#method.into_bytes
336///
337/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
338/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
339/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error`
340/// through the [`utf8_error`] method.
341///
342/// [`Utf8Error`]: https://doc.rust-lang.org/std/str/struct.Utf8Error.html
343/// [`std::str`]: https://doc.rust-lang.org/std/str/index.html
344/// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
345/// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
346/// [`utf8_error`]: #method.utf8_error
347///
348/// # Examples
349///
350/// Basic usage:
351///
352/// ```
353/// use bumpalo::{Bump, collections::String};
354///
355/// let b = Bump::new();
356///
357/// // some invalid bytes, in a vector
358/// let bytes = bumpalo::vec![in &b; 0, 159];
359///
360/// let value = String::from_utf8(bytes);
361///
362/// assert!(value.is_err());
363/// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
364/// ```
365#[derive(Debug)]
366pub struct FromUtf8Error<'bump> {
367    bytes: Vec<'bump, u8>,
368    error: Utf8Error,
369}
370
371/// A possible error value when converting a `String` from a UTF-16 byte slice.
372///
373/// This type is the error type for the [`from_utf16_in`] method on [`String`].
374///
375/// [`from_utf16_in`]: struct.String.html#method.from_utf16_in
376/// [`String`]: struct.String.html
377///
378/// # Examples
379///
380/// Basic usage:
381///
382/// ```
383/// use bumpalo::{Bump, collections::String};
384///
385/// let b = Bump::new();
386///
387/// // 𝄞mu<invalid>ic
388/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
389///
390/// assert!(String::from_utf16_in(v, &b).is_err());
391/// ```
392#[derive(Debug)]
393pub struct FromUtf16Error(());
394
395impl<'bump> String<'bump> {
396    /// Creates a new empty `String`.
397    ///
398    /// Given that the `String` is empty, this will not allocate any initial
399    /// buffer. While that means that this initial operation is very
400    /// inexpensive, it may cause excessive allocation later when you add
401    /// data. If you have an idea of how much data the `String` will hold,
402    /// consider the [`with_capacity_in`] method to prevent excessive
403    /// re-allocation.
404    ///
405    /// [`with_capacity_in`]: #method.with_capacity_in
406    ///
407    /// # Examples
408    ///
409    /// Basic usage:
410    ///
411    /// ```
412    /// use bumpalo::{Bump, collections::String};
413    ///
414    /// let b = Bump::new();
415    ///
416    /// let s = String::new_in(&b);
417    /// ```
418    #[inline]
419    pub fn new_in(bump: &'bump Bump) -> String<'bump> {
420        String {
421            vec: Vec::new_in(bump),
422        }
423    }
424
425    /// Creates a new empty `String` with a particular capacity.
426    ///
427    /// `String`s have an internal buffer to hold their data. The capacity is
428    /// the length of that buffer, and can be queried with the [`capacity`]
429    /// method. This method creates an empty `String`, but one with an initial
430    /// buffer that can hold `capacity` bytes. This is useful when you may be
431    /// appending a bunch of data to the `String`, reducing the number of
432    /// reallocations it needs to do.
433    ///
434    /// [`capacity`]: #method.capacity
435    ///
436    /// If the given capacity is `0`, no allocation will occur, and this method
437    /// is identical to the [`new_in`] method.
438    ///
439    /// [`new_in`]: #method.new
440    ///
441    /// # Examples
442    ///
443    /// Basic usage:
444    ///
445    /// ```
446    /// use bumpalo::{Bump, collections::String};
447    ///
448    /// let b = Bump::new();
449    ///
450    /// let mut s = String::with_capacity_in(10, &b);
451    ///
452    /// // The String contains no chars, even though it has capacity for more
453    /// assert_eq!(s.len(), 0);
454    ///
455    /// // These are all done without reallocating...
456    /// let cap = s.capacity();
457    /// for _ in 0..10 {
458    ///     s.push('a');
459    /// }
460    ///
461    /// assert_eq!(s.capacity(), cap);
462    ///
463    /// // ...but this may make the vector reallocate
464    /// s.push('a');
465    /// ```
466    #[inline]
467    pub fn with_capacity_in(capacity: usize, bump: &'bump Bump) -> String<'bump> {
468        String {
469            vec: Vec::with_capacity_in(capacity, bump),
470        }
471    }
472
473    /// Converts a vector of bytes to a `String`.
474    ///
475    /// A string (`String`) is made of bytes ([`u8`]), and a vector of bytes
476    /// ([`Vec<u8>`]) is made of bytes, so this function converts between the
477    /// two. Not all byte slices are valid `String`s, however: `String`
478    /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
479    /// the bytes are valid UTF-8, and then does the conversion.
480    ///
481    /// If you are sure that the byte slice is valid UTF-8, and you don't want
482    /// to incur the overhead of the validity check, there is an unsafe version
483    /// of this function, [`from_utf8_unchecked`], which has the same behavior
484    /// but skips the check.
485    ///
486    /// This method will take care to not copy the vector, for efficiency's
487    /// sake.
488    ///
489    /// If you need a [`&str`] instead of a `String`, consider
490    /// [`str::from_utf8`].
491    ///
492    /// The inverse of this method is [`into_bytes`].
493    ///
494    /// # Errors
495    ///
496    /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
497    /// provided bytes are not UTF-8. The vector you moved in is also included.
498    ///
499    /// # Examples
500    ///
501    /// Basic usage:
502    ///
503    /// ```
504    /// use bumpalo::{Bump, collections::String};
505    ///
506    /// let b = Bump::new();
507    ///
508    /// // some bytes, in a vector
509    /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
510    ///
511    /// // We know these bytes are valid, so we'll use `unwrap()`.
512    /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
513    ///
514    /// assert_eq!("💖", sparkle_heart);
515    /// ```
516    ///
517    /// Incorrect bytes:
518    ///
519    /// ```
520    /// use bumpalo::{Bump, collections::String};
521    ///
522    /// let b = Bump::new();
523    ///
524    /// // some invalid bytes, in a vector
525    /// let sparkle_heart = bumpalo::vec![in &b; 0, 159, 146, 150];
526    ///
527    /// assert!(String::from_utf8(sparkle_heart).is_err());
528    /// ```
529    ///
530    /// See the docs for [`FromUtf8Error`] for more details on what you can do
531    /// with this error.
532    ///
533    /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
534    /// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
535    /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
536    /// [`Vec<u8>`]: ../vec/struct.Vec.html
537    /// [`str::from_utf8`]: https://doc.rust-lang.org/std/str/fn.from_utf8.html
538    /// [`into_bytes`]: struct.String.html#method.into_bytes
539    /// [`FromUtf8Error`]: struct.FromUtf8Error.html
540    /// [`Err`]: https://doc.rust-lang.org/std/result/enum.Result.html#variant.Err
541    #[inline]
542    pub fn from_utf8(vec: Vec<'bump, u8>) -> Result<String<'bump>, FromUtf8Error<'bump>> {
543        match str::from_utf8(&vec) {
544            Ok(..) => Ok(String { vec }),
545            Err(e) => Err(FromUtf8Error {
546                bytes: vec,
547                error: e,
548            }),
549        }
550    }
551
552    /// Converts a slice of bytes to a string, including invalid characters.
553    ///
554    /// Strings are made of bytes ([`u8`]), and a slice of bytes
555    /// ([`&[u8]`][slice]) is made of bytes, so this function converts
556    /// between the two. Not all byte slices are valid strings, however: strings
557    /// are required to be valid UTF-8. During this conversion,
558    /// `from_utf8_lossy_in()` will replace any invalid UTF-8 sequences with
559    /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], which looks like this: �
560    ///
561    /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
562    /// [slice]: https://doc.rust-lang.org/std/primitive.slice.html
563    /// [U+FFFD]: https://doc.rust-lang.org/std/char/constant.REPLACEMENT_CHARACTER.html
564    ///
565    /// If you are sure that the byte slice is valid UTF-8, and you don't want
566    /// to incur the overhead of the conversion, there is an unsafe version
567    /// of this function, [`from_utf8_unchecked`], which has the same behavior
568    /// but skips the checks.
569    ///
570    /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
571    ///
572    /// # Examples
573    ///
574    /// Basic usage:
575    ///
576    /// ```
577    /// use bumpalo::{collections::String, Bump, vec};
578    ///
579    /// let b = Bump::new();
580    ///
581    /// // some bytes, in a vector
582    /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
583    ///
584    /// let sparkle_heart = String::from_utf8_lossy_in(&sparkle_heart, &b);
585    ///
586    /// assert_eq!("💖", sparkle_heart);
587    /// ```
588    ///
589    /// Incorrect bytes:
590    ///
591    /// ```
592    /// use bumpalo::{collections::String, Bump, vec};
593    ///
594    /// let b = Bump::new();
595    ///
596    /// // some invalid bytes
597    /// let input = b"Hello \xF0\x90\x80World";
598    /// let output = String::from_utf8_lossy_in(input, &b);
599    ///
600    /// assert_eq!("Hello �World", output);
601    /// ```
602    pub fn from_utf8_lossy_in(v: &[u8], bump: &'bump Bump) -> String<'bump> {
603        let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks();
604
605        let (first_valid, first_broken) = if let Some(chunk) = iter.next() {
606            let lossy::Utf8LossyChunk { valid, broken } = chunk;
607            if valid.len() == v.len() {
608                debug_assert!(broken.is_empty());
609                unsafe {
610                    return String::from_utf8_unchecked(Vec::from_iter_in(v.iter().cloned(), bump));
611                }
612            }
613            (valid, broken)
614        } else {
615            return String::from_str_in("", bump);
616        };
617
618        const REPLACEMENT: &str = "\u{FFFD}";
619
620        let mut res = String::with_capacity_in(v.len(), bump);
621        res.push_str(first_valid);
622        if !first_broken.is_empty() {
623            res.push_str(REPLACEMENT);
624        }
625
626        for lossy::Utf8LossyChunk { valid, broken } in iter {
627            res.push_str(valid);
628            if !broken.is_empty() {
629                res.push_str(REPLACEMENT);
630            }
631        }
632
633        res
634    }
635
636    /// Decode a UTF-16 encoded slice `v` into a `String`, returning [`Err`]
637    /// if `v` contains any invalid data.
638    ///
639    /// [`Err`]: https://doc.rust-lang.org/std/result/enum.Result.html#variant.Err
640    ///
641    /// # Examples
642    ///
643    /// Basic usage:
644    ///
645    /// ```
646    /// use bumpalo::{Bump, collections::String};
647    ///
648    /// let b = Bump::new();
649    ///
650    /// // 𝄞music
651    /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063];
652    /// assert_eq!(String::from_str_in("𝄞music", &b), String::from_utf16_in(v, &b).unwrap());
653    ///
654    /// // 𝄞mu<invalid>ic
655    /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
656    /// assert!(String::from_utf16_in(v, &b).is_err());
657    /// ```
658    pub fn from_utf16_in(v: &[u16], bump: &'bump Bump) -> Result<String<'bump>, FromUtf16Error> {
659        let mut ret = String::with_capacity_in(v.len(), bump);
660        for c in decode_utf16(v.iter().cloned()) {
661            if let Ok(c) = c {
662                ret.push(c);
663            } else {
664                return Err(FromUtf16Error(()));
665            }
666        }
667        Ok(ret)
668    }
669
670    /// Construct a new `String<'bump>` from a string slice.
671    ///
672    /// # Examples
673    ///
674    /// ```
675    /// use bumpalo::{Bump, collections::String};
676    ///
677    /// let b = Bump::new();
678    ///
679    /// let s = String::from_str_in("hello", &b);
680    /// assert_eq!(s, "hello");
681    /// ```
682    pub fn from_str_in(s: &str, bump: &'bump Bump) -> String<'bump> {
683        let len = s.len();
684        let mut t = String::with_capacity_in(len, bump);
685        // SAFETY:
686        // * `src` is valid for reads of `s.len()` bytes by virtue of being an allocated `&str`.
687        // * `dst` is valid for writes of `s.len()` bytes as `String::with_capacity_in(s.len(), bump)`
688        //   above guarantees that.
689        // * Alignment is not relevant as `u8` has no alignment requirements.
690        // * Source and destination ranges cannot overlap as we just reserved the destination
691        //   range from the bump.
692        unsafe { ptr::copy_nonoverlapping(s.as_ptr(), t.vec.as_mut_ptr(), len) };
693        // SAFETY: We reserved sufficent capacity for the string above.
694        // The elements at `0..len` were initialized by `copy_nonoverlapping` above.
695        unsafe { t.vec.set_len(len) };
696        t
697    }
698
699    /// Construct a new `String<'bump>` from an iterator of `char`s.
700    ///
701    /// # Examples
702    ///
703    /// ```
704    /// use bumpalo::{Bump, collections::String};
705    ///
706    /// let b = Bump::new();
707    ///
708    /// let s = String::from_iter_in(['h', 'e', 'l', 'l', 'o'].iter().cloned(), &b);
709    /// assert_eq!(s, "hello");
710    /// ```
711    pub fn from_iter_in<I: IntoIterator<Item = char>>(iter: I, bump: &'bump Bump) -> String<'bump> {
712        let mut s = String::new_in(bump);
713        for c in iter {
714            s.push(c);
715        }
716        s
717    }
718
719    /// Creates a new `String` from a length, capacity, and pointer.
720    ///
721    /// # Safety
722    ///
723    /// This is highly unsafe, due to the number of invariants that aren't
724    /// checked:
725    ///
726    /// * The memory at `ptr` needs to have been previously allocated by the
727    ///   same allocator the standard library uses.
728    /// * `length` needs to be less than or equal to `capacity`.
729    /// * `capacity` needs to be the correct value.
730    ///
731    /// Violating these may cause problems like corrupting the allocator's
732    /// internal data structures.
733    ///
734    /// The ownership of `ptr` is effectively transferred to the
735    /// `String` which may then deallocate, reallocate or change the
736    /// contents of memory pointed to by the pointer at will. Ensure
737    /// that nothing else uses the pointer after calling this
738    /// function.
739    ///
740    /// # Examples
741    ///
742    /// Basic usage:
743    ///
744    /// ```
745    /// use bumpalo::{Bump, collections::String};
746    /// use std::mem;
747    ///
748    /// let b = Bump::new();
749    ///
750    /// unsafe {
751    ///     let mut s = String::from_str_in("hello", &b);
752    ///     let ptr = s.as_mut_ptr();
753    ///     let len = s.len();
754    ///     let capacity = s.capacity();
755    ///
756    ///     mem::forget(s);
757    ///
758    ///     let s = String::from_raw_parts_in(ptr, len, capacity, &b);
759    ///
760    ///     assert_eq!(s, "hello");
761    /// }
762    /// ```
763    #[inline]
764    pub unsafe fn from_raw_parts_in(
765        buf: *mut u8,
766        length: usize,
767        capacity: usize,
768        bump: &'bump Bump,
769    ) -> String<'bump> {
770        String {
771            vec: Vec::from_raw_parts_in(buf, length, capacity, bump),
772        }
773    }
774
775    /// Converts a vector of bytes to a `String` without checking that the
776    /// string contains valid UTF-8.
777    ///
778    /// See the safe version, [`from_utf8`], for more details.
779    ///
780    /// [`from_utf8`]: struct.String.html#method.from_utf8
781    ///
782    /// # Safety
783    ///
784    /// This function is unsafe because it does not check that the bytes passed
785    /// to it are valid UTF-8. If this constraint is violated, it may cause
786    /// memory unsafety issues with future users of the `String`,
787    /// as it is assumed that `String`s are valid UTF-8.
788    ///
789    /// # Examples
790    ///
791    /// Basic usage:
792    ///
793    /// ```
794    /// use bumpalo::{Bump, collections::String};
795    ///
796    /// let b = Bump::new();
797    ///
798    /// // some bytes, in a vector
799    /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
800    ///
801    /// let sparkle_heart = unsafe {
802    ///     String::from_utf8_unchecked(sparkle_heart)
803    /// };
804    ///
805    /// assert_eq!("💖", sparkle_heart);
806    /// ```
807    #[inline]
808    pub unsafe fn from_utf8_unchecked(bytes: Vec<'bump, u8>) -> String<'bump> {
809        String { vec: bytes }
810    }
811
812    /// Returns a shared reference to the allocator backing this `String`.
813    ///
814    /// # Examples
815    ///
816    /// ```
817    /// use bumpalo::{Bump, collections::String};
818    ///
819    /// // uses the same allocator as the provided `String`
820    /// fn copy_string<'bump>(s: &String<'bump>) -> &'bump str {
821    ///     s.bump().alloc_str(s.as_str())
822    /// }
823    /// ```
824    #[inline]
825    #[must_use]
826    pub fn bump(&self) -> &'bump Bump {
827        self.vec.bump()
828    }
829
830    /// Converts a `String` into a byte vector.
831    ///
832    /// This consumes the `String`, so we do not need to copy its contents.
833    ///
834    /// # Examples
835    ///
836    /// Basic usage:
837    ///
838    /// ```
839    /// use bumpalo::{Bump, collections::String};
840    ///
841    /// let b = Bump::new();
842    ///
843    /// let s = String::from_str_in("hello", &b);
844    ///
845    /// assert_eq!(s.into_bytes(), [104, 101, 108, 108, 111]);
846    /// ```
847    #[inline]
848    pub fn into_bytes(self) -> Vec<'bump, u8> {
849        self.vec
850    }
851
852    /// Convert this `String<'bump>` into a `&'bump str`. This is analogous to
853    /// [`std::string::String::into_boxed_str`][into_boxed_str].
854    ///
855    /// [into_boxed_str]: https://doc.rust-lang.org/std/string/struct.String.html#method.into_boxed_str
856    ///
857    /// # Example
858    ///
859    /// ```
860    /// use bumpalo::{Bump, collections::String};
861    ///
862    /// let b = Bump::new();
863    ///
864    /// let s = String::from_str_in("foo", &b);
865    ///
866    /// assert_eq!(s.into_bump_str(), "foo");
867    /// ```
868    pub fn into_bump_str(self) -> &'bump str {
869        let s = unsafe {
870            let s = self.as_str();
871            mem::transmute(s)
872        };
873        mem::forget(self);
874        s
875    }
876
877    /// Extracts a string slice containing the entire `String`.
878    ///
879    /// # Examples
880    ///
881    /// Basic usage:
882    ///
883    /// ```
884    /// use bumpalo::{Bump, collections::String};
885    ///
886    /// let b = Bump::new();
887    ///
888    /// let s = String::from_str_in("foo", &b);
889    ///
890    /// assert_eq!("foo", s.as_str());
891    /// ```
892    #[inline]
893    pub fn as_str(&self) -> &str {
894        self
895    }
896
897    /// Converts a `String` into a mutable string slice.
898    ///
899    /// # Examples
900    ///
901    /// Basic usage:
902    ///
903    /// ```
904    /// use bumpalo::{Bump, collections::String};
905    ///
906    /// let b = Bump::new();
907    ///
908    /// let mut s = String::from_str_in("foobar", &b);
909    /// let s_mut_str = s.as_mut_str();
910    ///
911    /// s_mut_str.make_ascii_uppercase();
912    ///
913    /// assert_eq!("FOOBAR", s_mut_str);
914    /// ```
915    #[inline]
916    pub fn as_mut_str(&mut self) -> &mut str {
917        self
918    }
919
920    /// Appends a given string slice onto the end of this `String`.
921    ///
922    /// # Examples
923    ///
924    /// Basic usage:
925    ///
926    /// ```
927    /// use bumpalo::{Bump, collections::String};
928    ///
929    /// let b = Bump::new();
930    ///
931    /// let mut s = String::from_str_in("foo", &b);
932    ///
933    /// s.push_str("bar");
934    ///
935    /// assert_eq!("foobar", s);
936    /// ```
937    #[inline]
938    pub fn push_str(&mut self, string: &str) {
939        self.vec.extend_from_slice_copy(string.as_bytes())
940    }
941
942    /// Returns this `String`'s capacity, in bytes.
943    ///
944    /// # Examples
945    ///
946    /// Basic usage:
947    ///
948    /// ```
949    /// use bumpalo::{Bump, collections::String};
950    ///
951    /// let b = Bump::new();
952    ///
953    /// let s = String::with_capacity_in(10, &b);
954    ///
955    /// assert!(s.capacity() >= 10);
956    /// ```
957    #[inline]
958    pub fn capacity(&self) -> usize {
959        self.vec.capacity()
960    }
961
962    /// Ensures that this `String`'s capacity is at least `additional` bytes
963    /// larger than its length.
964    ///
965    /// The capacity may be increased by more than `additional` bytes if it
966    /// chooses, to prevent frequent reallocations.
967    ///
968    /// If you do not want this "at least" behavior, see the [`reserve_exact`]
969    /// method.
970    ///
971    /// # Panics
972    ///
973    /// Panics if the new capacity overflows [`usize`].
974    ///
975    /// [`reserve_exact`]: struct.String.html#method.reserve_exact
976    /// [`usize`]: https://doc.rust-lang.org/std/primitive.usize.html
977    ///
978    /// # Examples
979    ///
980    /// Basic usage:
981    ///
982    /// ```
983    /// use bumpalo::{Bump, collections::String};
984    ///
985    /// let b = Bump::new();
986    ///
987    /// let mut s = String::new_in(&b);
988    ///
989    /// s.reserve(10);
990    ///
991    /// assert!(s.capacity() >= 10);
992    /// ```
993    ///
994    /// This may not actually increase the capacity:
995    ///
996    /// ```
997    /// use bumpalo::{Bump, collections::String};
998    ///
999    /// let b = Bump::new();
1000    ///
1001    /// let mut s = String::with_capacity_in(10, &b);
1002    /// s.push('a');
1003    /// s.push('b');
1004    ///
1005    /// // s now has a length of 2 and a capacity of 10
1006    /// assert_eq!(2, s.len());
1007    /// assert_eq!(10, s.capacity());
1008    ///
1009    /// // Since we already have an extra 8 capacity, calling this...
1010    /// s.reserve(8);
1011    ///
1012    /// // ... doesn't actually increase.
1013    /// assert_eq!(10, s.capacity());
1014    /// ```
1015    #[inline]
1016    pub fn reserve(&mut self, additional: usize) {
1017        self.vec.reserve(additional)
1018    }
1019
1020    /// Ensures that this `String`'s capacity is `additional` bytes
1021    /// larger than its length.
1022    ///
1023    /// Consider using the [`reserve`] method unless you absolutely know
1024    /// better than the allocator.
1025    ///
1026    /// [`reserve`]: #method.reserve
1027    ///
1028    /// # Panics
1029    ///
1030    /// Panics if the new capacity overflows `usize`.
1031    ///
1032    /// # Examples
1033    ///
1034    /// Basic usage:
1035    ///
1036    /// ```
1037    /// use bumpalo::{Bump, collections::String};
1038    ///
1039    /// let b = Bump::new();
1040    ///
1041    /// let mut s = String::new_in(&b);
1042    ///
1043    /// s.reserve_exact(10);
1044    ///
1045    /// assert!(s.capacity() >= 10);
1046    /// ```
1047    ///
1048    /// This may not actually increase the capacity:
1049    ///
1050    /// ```
1051    /// use bumpalo::{Bump, collections::String};
1052    ///
1053    /// let b = Bump::new();
1054    ///
1055    /// let mut s = String::with_capacity_in(10, &b);
1056    /// s.push('a');
1057    /// s.push('b');
1058    ///
1059    /// // s now has a length of 2 and a capacity of 10
1060    /// assert_eq!(2, s.len());
1061    /// assert_eq!(10, s.capacity());
1062    ///
1063    /// // Since we already have an extra 8 capacity, calling this...
1064    /// s.reserve_exact(8);
1065    ///
1066    /// // ... doesn't actually increase.
1067    /// assert_eq!(10, s.capacity());
1068    /// ```
1069    #[inline]
1070    pub fn reserve_exact(&mut self, additional: usize) {
1071        self.vec.reserve_exact(additional)
1072    }
1073
1074    /// Shrinks the capacity of this `String` to match its length.
1075    ///
1076    /// # Examples
1077    ///
1078    /// Basic usage:
1079    ///
1080    /// ```
1081    /// use bumpalo::{Bump, collections::String};
1082    ///
1083    /// let b = Bump::new();
1084    ///
1085    /// let mut s = String::from_str_in("foo", &b);
1086    ///
1087    /// s.reserve(100);
1088    /// assert!(s.capacity() >= 100);
1089    ///
1090    /// s.shrink_to_fit();
1091    /// assert_eq!(3, s.capacity());
1092    /// ```
1093    #[inline]
1094    pub fn shrink_to_fit(&mut self) {
1095        self.vec.shrink_to_fit()
1096    }
1097
1098    /// Appends the given [`char`] to the end of this `String`.
1099    ///
1100    /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1101    ///
1102    /// # Examples
1103    ///
1104    /// Basic usage:
1105    ///
1106    /// ```
1107    /// use bumpalo::{Bump, collections::String};
1108    ///
1109    /// let b = Bump::new();
1110    ///
1111    /// let mut s = String::from_str_in("abc", &b);
1112    ///
1113    /// s.push('1');
1114    /// s.push('2');
1115    /// s.push('3');
1116    ///
1117    /// assert_eq!("abc123", s);
1118    /// ```
1119    #[inline]
1120    pub fn push(&mut self, ch: char) {
1121        match ch.len_utf8() {
1122            1 => self.vec.push(ch as u8),
1123            _ => self
1124                .vec
1125                .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
1126        }
1127    }
1128
1129    /// Returns a byte slice of this `String`'s contents.
1130    ///
1131    /// The inverse of this method is [`from_utf8`].
1132    ///
1133    /// [`from_utf8`]: #method.from_utf8
1134    ///
1135    /// # Examples
1136    ///
1137    /// Basic usage:
1138    ///
1139    /// ```
1140    /// use bumpalo::{Bump, collections::String};
1141    ///
1142    /// let b = Bump::new();
1143    ///
1144    /// let s = String::from_str_in("hello", &b);
1145    ///
1146    /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
1147    /// ```
1148    #[inline]
1149    pub fn as_bytes(&self) -> &[u8] {
1150        &self.vec
1151    }
1152
1153    /// Shortens this `String` to the specified length.
1154    ///
1155    /// If `new_len` is greater than the string's current length, this has no
1156    /// effect.
1157    ///
1158    /// Note that this method has no effect on the allocated capacity
1159    /// of the string.
1160    ///
1161    /// # Panics
1162    ///
1163    /// Panics if `new_len` does not lie on a [`char`] boundary.
1164    ///
1165    /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1166    ///
1167    /// # Examples
1168    ///
1169    /// Basic usage:
1170    ///
1171    /// ```
1172    /// use bumpalo::{Bump, collections::String};
1173    ///
1174    /// let b = Bump::new();
1175    ///
1176    /// let mut s = String::from_str_in("hello", &b);
1177    ///
1178    /// s.truncate(2);
1179    ///
1180    /// assert_eq!("he", s);
1181    /// ```
1182    #[inline]
1183    pub fn truncate(&mut self, new_len: usize) {
1184        if new_len <= self.len() {
1185            assert!(self.is_char_boundary(new_len));
1186            self.vec.truncate(new_len)
1187        }
1188    }
1189
1190    /// Removes the last character from the string buffer and returns it.
1191    ///
1192    /// Returns [`None`] if this `String` is empty.
1193    ///
1194    /// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
1195    ///
1196    /// # Examples
1197    ///
1198    /// Basic usage:
1199    ///
1200    /// ```
1201    /// use bumpalo::{Bump, collections::String};
1202    ///
1203    /// let b = Bump::new();
1204    ///
1205    /// let mut s = String::from_str_in("foo", &b);
1206    ///
1207    /// assert_eq!(s.pop(), Some('o'));
1208    /// assert_eq!(s.pop(), Some('o'));
1209    /// assert_eq!(s.pop(), Some('f'));
1210    ///
1211    /// assert_eq!(s.pop(), None);
1212    /// ```
1213    #[inline]
1214    pub fn pop(&mut self) -> Option<char> {
1215        let ch = self.chars().rev().next()?;
1216        let newlen = self.len() - ch.len_utf8();
1217        unsafe {
1218            self.vec.set_len(newlen);
1219        }
1220        Some(ch)
1221    }
1222
1223    /// Removes a [`char`] from this `String` at a byte position and returns it.
1224    ///
1225    /// This is an `O(n)` operation, as it requires copying every element in the
1226    /// buffer.
1227    ///
1228    /// # Panics
1229    ///
1230    /// Panics if `idx` is larger than or equal to the `String`'s length,
1231    /// or if it does not lie on a [`char`] boundary.
1232    ///
1233    /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1234    ///
1235    /// # Examples
1236    ///
1237    /// Basic usage:
1238    ///
1239    /// ```
1240    /// use bumpalo::{Bump, collections::String};
1241    ///
1242    /// let b = Bump::new();
1243    ///
1244    /// let mut s = String::from_str_in("foo", &b);
1245    ///
1246    /// assert_eq!(s.remove(0), 'f');
1247    /// assert_eq!(s.remove(1), 'o');
1248    /// assert_eq!(s.remove(0), 'o');
1249    /// ```
1250    #[inline]
1251    pub fn remove(&mut self, idx: usize) -> char {
1252        let ch = match self[idx..].chars().next() {
1253            Some(ch) => ch,
1254            None => panic!("cannot remove a char from the end of a string"),
1255        };
1256
1257        let next = idx + ch.len_utf8();
1258        let len = self.len();
1259        unsafe {
1260            ptr::copy(
1261                self.vec.as_ptr().add(next),
1262                self.vec.as_mut_ptr().add(idx),
1263                len - next,
1264            );
1265            self.vec.set_len(len - (next - idx));
1266        }
1267        ch
1268    }
1269
1270    /// Retains only the characters specified by the predicate.
1271    ///
1272    /// In other words, remove all characters `c` such that `f(c)` returns `false`.
1273    /// This method operates in place and preserves the order of the retained
1274    /// characters.
1275    ///
1276    /// # Examples
1277    ///
1278    /// ```
1279    /// use bumpalo::{Bump, collections::String};
1280    ///
1281    /// let b = Bump::new();
1282    ///
1283    /// let mut s = String::from_str_in("f_o_ob_ar", &b);
1284    ///
1285    /// s.retain(|c| c != '_');
1286    ///
1287    /// assert_eq!(s, "foobar");
1288    /// ```
1289    #[inline]
1290    pub fn retain<F>(&mut self, mut f: F)
1291    where
1292        F: FnMut(char) -> bool,
1293    {
1294        let len = self.len();
1295        let mut del_bytes = 0;
1296        let mut idx = 0;
1297
1298        while idx < len {
1299            let ch = unsafe { self.get_unchecked(idx..len).chars().next().unwrap() };
1300            let ch_len = ch.len_utf8();
1301
1302            if !f(ch) {
1303                del_bytes += ch_len;
1304            } else if del_bytes > 0 {
1305                unsafe {
1306                    ptr::copy(
1307                        self.vec.as_ptr().add(idx),
1308                        self.vec.as_mut_ptr().add(idx - del_bytes),
1309                        ch_len,
1310                    );
1311                }
1312            }
1313
1314            // Point idx to the next char
1315            idx += ch_len;
1316        }
1317
1318        if del_bytes > 0 {
1319            unsafe {
1320                self.vec.set_len(len - del_bytes);
1321            }
1322        }
1323    }
1324
1325    /// Inserts a character into this `String` at a byte position.
1326    ///
1327    /// This is an `O(n)` operation as it requires copying every element in the
1328    /// buffer.
1329    ///
1330    /// # Panics
1331    ///
1332    /// Panics if `idx` is larger than the `String`'s length, or if it does not
1333    /// lie on a [`char`] boundary.
1334    ///
1335    /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1336    ///
1337    /// # Examples
1338    ///
1339    /// Basic usage:
1340    ///
1341    /// ```
1342    /// use bumpalo::{Bump, collections::String};
1343    ///
1344    /// let b = Bump::new();
1345    ///
1346    /// let mut s = String::with_capacity_in(3, &b);
1347    ///
1348    /// s.insert(0, 'f');
1349    /// s.insert(1, 'o');
1350    /// s.insert(2, 'o');
1351    ///
1352    /// assert_eq!("foo", s);
1353    /// ```
1354    #[inline]
1355    pub fn insert(&mut self, idx: usize, ch: char) {
1356        assert!(self.is_char_boundary(idx));
1357        let mut bits = [0; 4];
1358        let bits = ch.encode_utf8(&mut bits).as_bytes();
1359
1360        unsafe {
1361            self.insert_bytes(idx, bits);
1362        }
1363    }
1364
1365    unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) {
1366        let len = self.len();
1367        let amt = bytes.len();
1368        self.vec.reserve(amt);
1369
1370        ptr::copy(
1371            self.vec.as_ptr().add(idx),
1372            self.vec.as_mut_ptr().add(idx + amt),
1373            len - idx,
1374        );
1375        ptr::copy(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt);
1376        self.vec.set_len(len + amt);
1377    }
1378
1379    /// Inserts a string slice into this `String` at a byte position.
1380    ///
1381    /// This is an `O(n)` operation as it requires copying every element in the
1382    /// buffer.
1383    ///
1384    /// # Panics
1385    ///
1386    /// Panics if `idx` is larger than the `String`'s length, or if it does not
1387    /// lie on a [`char`] boundary.
1388    ///
1389    /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1390    ///
1391    /// # Examples
1392    ///
1393    /// Basic usage:
1394    ///
1395    /// ```
1396    /// use bumpalo::{Bump, collections::String};
1397    ///
1398    /// let b = Bump::new();
1399    ///
1400    /// let mut s = String::from_str_in("bar", &b);
1401    ///
1402    /// s.insert_str(0, "foo");
1403    ///
1404    /// assert_eq!("foobar", s);
1405    /// ```
1406    #[inline]
1407    pub fn insert_str(&mut self, idx: usize, string: &str) {
1408        assert!(self.is_char_boundary(idx));
1409
1410        unsafe {
1411            self.insert_bytes(idx, string.as_bytes());
1412        }
1413    }
1414
1415    /// Returns a mutable reference to the contents of this `String`.
1416    ///
1417    /// # Safety
1418    ///
1419    /// This function is unsafe because the returned `&mut Vec` allows writing
1420    /// bytes which are not valid UTF-8. If this constraint is violated, using
1421    /// the original `String` after dropping the `&mut Vec` may violate memory
1422    /// safety, as it is assumed that `String`s are valid UTF-8.
1423    ///
1424    /// # Examples
1425    ///
1426    /// Basic usage:
1427    ///
1428    /// ```
1429    /// use bumpalo::{Bump, collections::String};
1430    ///
1431    /// let b = Bump::new();
1432    ///
1433    /// let mut s = String::from_str_in("hello", &b);
1434    ///
1435    /// unsafe {
1436    ///     let vec = s.as_mut_vec();
1437    ///     assert_eq!(vec, &[104, 101, 108, 108, 111]);
1438    ///
1439    ///     vec.reverse();
1440    /// }
1441    /// assert_eq!(s, "olleh");
1442    /// ```
1443    #[inline]
1444    pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<'bump, u8> {
1445        &mut self.vec
1446    }
1447
1448    /// Returns the length of this `String`, in bytes.
1449    ///
1450    /// # Examples
1451    ///
1452    /// Basic usage:
1453    ///
1454    /// ```
1455    /// use bumpalo::{Bump, collections::String};
1456    ///
1457    /// let b = Bump::new();
1458    ///
1459    /// let a = String::from_str_in("foo", &b);
1460    ///
1461    /// assert_eq!(a.len(), 3);
1462    /// ```
1463    #[inline]
1464    pub fn len(&self) -> usize {
1465        self.vec.len()
1466    }
1467
1468    /// Returns `true` if this `String` has a length of zero.
1469    ///
1470    /// Returns `false` otherwise.
1471    ///
1472    /// # Examples
1473    ///
1474    /// Basic usage:
1475    ///
1476    /// ```
1477    /// use bumpalo::{Bump, collections::String};
1478    ///
1479    /// let b = Bump::new();
1480    ///
1481    /// let mut v = String::new_in(&b);
1482    /// assert!(v.is_empty());
1483    ///
1484    /// v.push('a');
1485    /// assert!(!v.is_empty());
1486    /// ```
1487    #[inline]
1488    pub fn is_empty(&self) -> bool {
1489        self.len() == 0
1490    }
1491
1492    /// Splits the string into two at the given index.
1493    ///
1494    /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and
1495    /// the returned `String` contains bytes `[at, len)`. `at` must be on the
1496    /// boundary of a UTF-8 code point.
1497    ///
1498    /// Note that the capacity of `self` does not change.
1499    ///
1500    /// # Panics
1501    ///
1502    /// Panics if `at` is not on a UTF-8 code point boundary, or if it is beyond the last
1503    /// code point of the string.
1504    ///
1505    /// # Examples
1506    ///
1507    /// ```
1508    /// use bumpalo::{Bump, collections::String};
1509    ///
1510    /// let b = Bump::new();
1511    ///
1512    /// let mut hello = String::from_str_in("Hello, World!", &b);
1513    /// let world = hello.split_off(7);
1514    /// assert_eq!(hello, "Hello, ");
1515    /// assert_eq!(world, "World!");
1516    /// ```
1517    #[inline]
1518    pub fn split_off(&mut self, at: usize) -> String<'bump> {
1519        assert!(self.is_char_boundary(at));
1520        let other = self.vec.split_off(at);
1521        unsafe { String::from_utf8_unchecked(other) }
1522    }
1523
1524    /// Truncates this `String`, removing all contents.
1525    ///
1526    /// While this means the `String` will have a length of zero, it does not
1527    /// touch its capacity.
1528    ///
1529    /// # Examples
1530    ///
1531    /// Basic usage:
1532    ///
1533    /// ```
1534    /// use bumpalo::{Bump, collections::String};
1535    ///
1536    /// let b = Bump::new();
1537    ///
1538    /// let mut s = String::from_str_in("foo", &b);
1539    ///
1540    /// s.clear();
1541    ///
1542    /// assert!(s.is_empty());
1543    /// assert_eq!(0, s.len());
1544    /// assert_eq!(3, s.capacity());
1545    /// ```
1546    #[inline]
1547    pub fn clear(&mut self) {
1548        self.vec.clear()
1549    }
1550
1551    /// Creates a draining iterator that removes the specified range in the `String`
1552    /// and yields the removed `chars`.
1553    ///
1554    /// Note: The element range is removed even if the iterator is not
1555    /// consumed until the end.
1556    ///
1557    /// # Panics
1558    ///
1559    /// Panics if the starting point or end point do not lie on a [`char`]
1560    /// boundary, or if they're out of bounds.
1561    ///
1562    /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1563    ///
1564    /// # Examples
1565    ///
1566    /// Basic usage:
1567    ///
1568    /// ```
1569    /// use bumpalo::{Bump, collections::String};
1570    ///
1571    /// let b = Bump::new();
1572    ///
1573    /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1574    /// let beta_offset = s.find('β').unwrap_or(s.len());
1575    ///
1576    /// // Remove the range up until the β from the string
1577    /// let t = String::from_iter_in(s.drain(..beta_offset), &b);
1578    /// assert_eq!(t, "α is alpha, ");
1579    /// assert_eq!(s, "β is beta");
1580    ///
1581    /// // A full range clears the string
1582    /// drop(s.drain(..));
1583    /// assert_eq!(s, "");
1584    /// ```
1585    pub fn drain<'a, R>(&'a mut self, range: R) -> Drain<'a, 'bump>
1586    where
1587        R: RangeBounds<usize>,
1588    {
1589        // Memory safety
1590        //
1591        // The String version of Drain does not have the memory safety issues
1592        // of the vector version. The data is just plain bytes.
1593        // Because the range removal happens in Drop, if the Drain iterator is leaked,
1594        // the removal will not happen.
1595        let len = self.len();
1596        let start = match range.start_bound() {
1597            Included(&n) => n,
1598            Excluded(&n) => n + 1,
1599            Unbounded => 0,
1600        };
1601        let end = match range.end_bound() {
1602            Included(&n) => n + 1,
1603            Excluded(&n) => n,
1604            Unbounded => len,
1605        };
1606
1607        // Take out two simultaneous borrows. The &mut String won't be accessed
1608        // until iteration is over, in Drop.
1609        let self_ptr = self as *mut _;
1610        // slicing does the appropriate bounds checks
1611        let chars_iter = self[start..end].chars();
1612
1613        Drain {
1614            start,
1615            end,
1616            iter: chars_iter,
1617            string: self_ptr,
1618        }
1619    }
1620
1621    /// Removes the specified range in the string,
1622    /// and replaces it with the given string.
1623    /// The given string doesn't need to be the same length as the range.
1624    ///
1625    /// # Panics
1626    ///
1627    /// Panics if the starting point or end point do not lie on a [`char`]
1628    /// boundary, or if they're out of bounds.
1629    ///
1630    /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1631    /// [`Vec::splice`]: ../vec/struct.Vec.html#method.splice
1632    ///
1633    /// # Examples
1634    ///
1635    /// Basic usage:
1636    ///
1637    /// ```
1638    /// use bumpalo::{Bump, collections::String};
1639    ///
1640    /// let b = Bump::new();
1641    ///
1642    /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1643    /// let beta_offset = s.find('β').unwrap_or(s.len());
1644    ///
1645    /// // Replace the range up until the β from the string
1646    /// s.replace_range(..beta_offset, "Α is capital alpha; ");
1647    /// assert_eq!(s, "Α is capital alpha; β is beta");
1648    /// ```
1649    pub fn replace_range<R>(&mut self, range: R, replace_with: &str)
1650    where
1651        R: RangeBounds<usize>,
1652    {
1653        // Memory safety
1654        //
1655        // Replace_range does not have the memory safety issues of a vector Splice.
1656        // of the vector version. The data is just plain bytes.
1657
1658        match range.start_bound() {
1659            Included(&n) => assert!(self.is_char_boundary(n)),
1660            Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
1661            Unbounded => {}
1662        };
1663        match range.end_bound() {
1664            Included(&n) => assert!(self.is_char_boundary(n + 1)),
1665            Excluded(&n) => assert!(self.is_char_boundary(n)),
1666            Unbounded => {}
1667        };
1668
1669        unsafe { self.as_mut_vec() }.splice(range, replace_with.bytes());
1670    }
1671}
1672
1673impl<'bump> FromUtf8Error<'bump> {
1674    /// Returns a slice of bytes that were attempted to convert to a `String`.
1675    ///
1676    /// # Examples
1677    ///
1678    /// Basic usage:
1679    ///
1680    /// ```
1681    /// use bumpalo::{Bump, collections::String};
1682    ///
1683    /// let b = Bump::new();
1684    ///
1685    /// // some invalid bytes, in a vector
1686    /// let bytes = bumpalo::vec![in &b; 0, 159];
1687    ///
1688    /// let value = String::from_utf8(bytes);
1689    ///
1690    /// assert_eq!(&[0, 159], value.unwrap_err().as_bytes());
1691    /// ```
1692    pub fn as_bytes(&self) -> &[u8] {
1693        &self.bytes[..]
1694    }
1695
1696    /// Returns the bytes that were attempted to convert to a `String`.
1697    ///
1698    /// This method is carefully constructed to avoid allocation. It will
1699    /// consume the error, moving out the bytes, so that a copy of the bytes
1700    /// does not need to be made.
1701    ///
1702    /// # Examples
1703    ///
1704    /// Basic usage:
1705    ///
1706    /// ```
1707    /// use bumpalo::{Bump, collections::String};
1708    ///
1709    /// let b = Bump::new();
1710    ///
1711    /// // some invalid bytes, in a vector
1712    /// let bytes = bumpalo::vec![in &b; 0, 159];
1713    ///
1714    /// let value = String::from_utf8(bytes);
1715    ///
1716    /// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
1717    /// ```
1718    pub fn into_bytes(self) -> Vec<'bump, u8> {
1719        self.bytes
1720    }
1721
1722    /// Fetch a `Utf8Error` to get more details about the conversion failure.
1723    ///
1724    /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
1725    /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
1726    /// an analogue to `FromUtf8Error`. See its documentation for more details
1727    /// on using it.
1728    ///
1729    /// [`Utf8Error`]: https://doc.rust-lang.org/std/str/struct.Utf8Error.html
1730    /// [`std::str`]: https://doc.rust-lang.org/std/str/index.html
1731    /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
1732    /// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
1733    ///
1734    /// # Examples
1735    ///
1736    /// Basic usage:
1737    ///
1738    /// ```
1739    /// use bumpalo::{Bump, collections::String};
1740    ///
1741    /// let b = Bump::new();
1742    ///
1743    /// // some invalid bytes, in a vector
1744    /// let bytes = bumpalo::vec![in &b; 0, 159];
1745    ///
1746    /// let error = String::from_utf8(bytes).unwrap_err().utf8_error();
1747    ///
1748    /// // the first byte is invalid here
1749    /// assert_eq!(1, error.valid_up_to());
1750    /// ```
1751    pub fn utf8_error(&self) -> Utf8Error {
1752        self.error
1753    }
1754}
1755
1756impl<'bump> fmt::Display for FromUtf8Error<'bump> {
1757    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1758        fmt::Display::fmt(&self.error, f)
1759    }
1760}
1761
1762impl fmt::Display for FromUtf16Error {
1763    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1764        fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
1765    }
1766}
1767
1768impl<'bump> Clone for String<'bump> {
1769    fn clone(&self) -> Self {
1770        String {
1771            vec: self.vec.clone(),
1772        }
1773    }
1774
1775    fn clone_from(&mut self, source: &Self) {
1776        self.vec.clone_from(&source.vec);
1777    }
1778}
1779
1780impl<'bump> Extend<char> for String<'bump> {
1781    fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) {
1782        let iterator = iter.into_iter();
1783        let (lower_bound, _) = iterator.size_hint();
1784        self.reserve(lower_bound);
1785        for ch in iterator {
1786            self.push(ch)
1787        }
1788    }
1789}
1790
1791impl<'a, 'bump> Extend<&'a char> for String<'bump> {
1792    fn extend<I: IntoIterator<Item = &'a char>>(&mut self, iter: I) {
1793        self.extend(iter.into_iter().cloned());
1794    }
1795}
1796
1797impl<'a, 'bump> Extend<&'a str> for String<'bump> {
1798    fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) {
1799        for s in iter {
1800            self.push_str(s)
1801        }
1802    }
1803}
1804
1805impl<'bump> Extend<String<'bump>> for String<'bump> {
1806    fn extend<I: IntoIterator<Item = String<'bump>>>(&mut self, iter: I) {
1807        for s in iter {
1808            self.push_str(&s)
1809        }
1810    }
1811}
1812
1813impl<'bump> Extend<core_alloc::string::String> for String<'bump> {
1814    fn extend<I: IntoIterator<Item = core_alloc::string::String>>(&mut self, iter: I) {
1815        for s in iter {
1816            self.push_str(&s)
1817        }
1818    }
1819}
1820
1821impl<'a, 'bump> Extend<Cow<'a, str>> for String<'bump> {
1822    fn extend<I: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: I) {
1823        for s in iter {
1824            self.push_str(&s)
1825        }
1826    }
1827}
1828
1829impl<'bump> PartialEq for String<'bump> {
1830    #[inline]
1831    fn eq(&self, other: &String) -> bool {
1832        PartialEq::eq(&self[..], &other[..])
1833    }
1834}
1835
1836macro_rules! impl_eq {
1837    ($lhs:ty, $rhs: ty) => {
1838        impl<'a, 'bump> PartialEq<$rhs> for $lhs {
1839            #[inline]
1840            fn eq(&self, other: &$rhs) -> bool {
1841                PartialEq::eq(&self[..], &other[..])
1842            }
1843        }
1844
1845        impl<'a, 'b, 'bump> PartialEq<$lhs> for $rhs {
1846            #[inline]
1847            fn eq(&self, other: &$lhs) -> bool {
1848                PartialEq::eq(&self[..], &other[..])
1849            }
1850        }
1851    };
1852}
1853
1854impl_eq! { String<'bump>, str }
1855impl_eq! { String<'bump>, &'a str }
1856impl_eq! { Cow<'a, str>, String<'bump> }
1857impl_eq! { core_alloc::string::String, String<'bump> }
1858
1859impl<'bump> fmt::Display for String<'bump> {
1860    #[inline]
1861    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1862        fmt::Display::fmt(&**self, f)
1863    }
1864}
1865
1866impl<'bump> fmt::Debug for String<'bump> {
1867    #[inline]
1868    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1869        fmt::Debug::fmt(&**self, f)
1870    }
1871}
1872
1873impl<'bump> hash::Hash for String<'bump> {
1874    #[inline]
1875    fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
1876        (**self).hash(hasher)
1877    }
1878}
1879
1880/// Implements the `+` operator for concatenating two strings.
1881///
1882/// This consumes the `String<'bump>` on the left-hand side and re-uses its buffer (growing it if
1883/// necessary). This is done to avoid allocating a new `String<'bump>` and copying the entire contents on
1884/// every operation, which would lead to `O(n^2)` running time when building an `n`-byte string by
1885/// repeated concatenation.
1886///
1887/// The string on the right-hand side is only borrowed; its contents are copied into the returned
1888/// `String<'bump>`.
1889///
1890/// # Examples
1891///
1892/// Concatenating two `String<'bump>`s takes the first by value and borrows the second:
1893///
1894/// ```
1895/// use bumpalo::{Bump, collections::String};
1896///
1897/// let bump = Bump::new();
1898///
1899/// let a = String::from_str_in("hello", &bump);
1900/// let b = String::from_str_in(" world", &bump);
1901/// let c = a + &b;
1902/// // `a` is moved and can no longer be used here.
1903/// ```
1904///
1905/// If you want to keep using the first `String`, you can clone it and append to the clone instead:
1906///
1907/// ```
1908/// use bumpalo::{Bump, collections::String};
1909///
1910/// let bump = Bump::new();
1911///
1912/// let a = String::from_str_in("hello", &bump);
1913/// let b = String::from_str_in(" world", &bump);
1914/// let c = a.clone() + &b;
1915/// // `a` is still valid here.
1916/// ```
1917///
1918/// Concatenating `&str` slices can be done by converting the first to a `String`:
1919///
1920/// ```
1921/// use bumpalo::{Bump, collections::String};
1922///
1923/// let bump = Bump::new();
1924///
1925/// let a = "hello";
1926/// let b = " world";
1927/// let c = String::from_str_in(a, &bump) + b;
1928/// ```
1929impl<'a, 'bump> Add<&'a str> for String<'bump> {
1930    type Output = String<'bump>;
1931
1932    #[inline]
1933    fn add(mut self, other: &str) -> String<'bump> {
1934        self.push_str(other);
1935        self
1936    }
1937}
1938
1939/// Implements the `+=` operator for appending to a `String<'bump>`.
1940///
1941/// This has the same behavior as the [`push_str`][String::push_str] method.
1942impl<'a, 'bump> AddAssign<&'a str> for String<'bump> {
1943    #[inline]
1944    fn add_assign(&mut self, other: &str) {
1945        self.push_str(other);
1946    }
1947}
1948
1949impl<'bump> ops::Index<ops::Range<usize>> for String<'bump> {
1950    type Output = str;
1951
1952    #[inline]
1953    fn index(&self, index: ops::Range<usize>) -> &str {
1954        &self[..][index]
1955    }
1956}
1957impl<'bump> ops::Index<ops::RangeTo<usize>> for String<'bump> {
1958    type Output = str;
1959
1960    #[inline]
1961    fn index(&self, index: ops::RangeTo<usize>) -> &str {
1962        &self[..][index]
1963    }
1964}
1965impl<'bump> ops::Index<ops::RangeFrom<usize>> for String<'bump> {
1966    type Output = str;
1967
1968    #[inline]
1969    fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1970        &self[..][index]
1971    }
1972}
1973impl<'bump> ops::Index<ops::RangeFull> for String<'bump> {
1974    type Output = str;
1975
1976    #[inline]
1977    fn index(&self, _index: ops::RangeFull) -> &str {
1978        unsafe { str::from_utf8_unchecked(&self.vec) }
1979    }
1980}
1981impl<'bump> ops::Index<ops::RangeInclusive<usize>> for String<'bump> {
1982    type Output = str;
1983
1984    #[inline]
1985    fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1986        Index::index(&**self, index)
1987    }
1988}
1989impl<'bump> ops::Index<ops::RangeToInclusive<usize>> for String<'bump> {
1990    type Output = str;
1991
1992    #[inline]
1993    fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1994        Index::index(&**self, index)
1995    }
1996}
1997
1998impl<'bump> ops::IndexMut<ops::Range<usize>> for String<'bump> {
1999    #[inline]
2000    fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
2001        &mut self[..][index]
2002    }
2003}
2004impl<'bump> ops::IndexMut<ops::RangeTo<usize>> for String<'bump> {
2005    #[inline]
2006    fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
2007        &mut self[..][index]
2008    }
2009}
2010impl<'bump> ops::IndexMut<ops::RangeFrom<usize>> for String<'bump> {
2011    #[inline]
2012    fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
2013        &mut self[..][index]
2014    }
2015}
2016impl<'bump> ops::IndexMut<ops::RangeFull> for String<'bump> {
2017    #[inline]
2018    fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
2019        unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2020    }
2021}
2022impl<'bump> ops::IndexMut<ops::RangeInclusive<usize>> for String<'bump> {
2023    #[inline]
2024    fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
2025        IndexMut::index_mut(&mut **self, index)
2026    }
2027}
2028impl<'bump> ops::IndexMut<ops::RangeToInclusive<usize>> for String<'bump> {
2029    #[inline]
2030    fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
2031        IndexMut::index_mut(&mut **self, index)
2032    }
2033}
2034
2035impl<'bump> ops::Deref for String<'bump> {
2036    type Target = str;
2037
2038    #[inline]
2039    fn deref(&self) -> &str {
2040        unsafe { str::from_utf8_unchecked(&self.vec) }
2041    }
2042}
2043
2044impl<'bump> ops::DerefMut for String<'bump> {
2045    #[inline]
2046    fn deref_mut(&mut self) -> &mut str {
2047        unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2048    }
2049}
2050
2051impl<'bump> AsRef<str> for String<'bump> {
2052    #[inline]
2053    fn as_ref(&self) -> &str {
2054        self
2055    }
2056}
2057
2058impl<'bump> AsRef<[u8]> for String<'bump> {
2059    #[inline]
2060    fn as_ref(&self) -> &[u8] {
2061        self.as_bytes()
2062    }
2063}
2064
2065impl<'bump> fmt::Write for String<'bump> {
2066    #[inline]
2067    fn write_str(&mut self, s: &str) -> fmt::Result {
2068        self.push_str(s);
2069        Ok(())
2070    }
2071
2072    #[inline]
2073    fn write_char(&mut self, c: char) -> fmt::Result {
2074        self.push(c);
2075        Ok(())
2076    }
2077}
2078
2079impl<'bump> Borrow<str> for String<'bump> {
2080    #[inline]
2081    fn borrow(&self) -> &str {
2082        &self[..]
2083    }
2084}
2085
2086impl<'bump> BorrowMut<str> for String<'bump> {
2087    #[inline]
2088    fn borrow_mut(&mut self) -> &mut str {
2089        &mut self[..]
2090    }
2091}
2092
2093/// A draining iterator for `String`.
2094///
2095/// This struct is created by the [`String::drain`] method. See its
2096/// documentation for more information.
2097pub struct Drain<'a, 'bump> {
2098    /// Will be used as &'a mut String in the destructor
2099    string: *mut String<'bump>,
2100    /// Start of part to remove
2101    start: usize,
2102    /// End of part to remove
2103    end: usize,
2104    /// Current remaining range to remove
2105    iter: Chars<'a>,
2106}
2107
2108impl<'a, 'bump> fmt::Debug for Drain<'a, 'bump> {
2109    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2110        f.pad("Drain { .. }")
2111    }
2112}
2113
2114unsafe impl<'a, 'bump> Sync for Drain<'a, 'bump> {}
2115unsafe impl<'a, 'bump> Send for Drain<'a, 'bump> {}
2116
2117impl<'a, 'bump> Drop for Drain<'a, 'bump> {
2118    fn drop(&mut self) {
2119        unsafe {
2120            // Use Vec::drain. "Reaffirm" the bounds checks to avoid
2121            // panic code being inserted again.
2122            let self_vec = (*self.string).as_mut_vec();
2123            if self.start <= self.end && self.end <= self_vec.len() {
2124                self_vec.drain(self.start..self.end);
2125            }
2126        }
2127    }
2128}
2129
2130// TODO: implement `AsRef<str/[u8]>` and `as_str`
2131
2132impl<'a, 'bump> Iterator for Drain<'a, 'bump> {
2133    type Item = char;
2134
2135    #[inline]
2136    fn next(&mut self) -> Option<char> {
2137        self.iter.next()
2138    }
2139
2140    fn size_hint(&self) -> (usize, Option<usize>) {
2141        self.iter.size_hint()
2142    }
2143}
2144
2145impl<'a, 'bump> DoubleEndedIterator for Drain<'a, 'bump> {
2146    #[inline]
2147    fn next_back(&mut self) -> Option<char> {
2148        self.iter.next_back()
2149    }
2150}
2151
2152impl<'a, 'bump> FusedIterator for Drain<'a, 'bump> {}