bumpalo/collections/string.rs
1// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! A UTF-8 encoded, growable string.
12//!
13//! This module contains the [`String`] type and several error types that may
14//! result from working with [`String`]s.
15//!
16//! This module is a fork of the [`std::string`] module, that uses a bump allocator.
17//!
18//! [`std::string`]: https://doc.rust-lang.org/std/string/index.html
19//!
20//! # Examples
21//!
22//! You can create a new [`String`] from a string literal with [`String::from_str_in`]:
23//!
24//! ```
25//! use bumpalo::{Bump, collections::String};
26//!
27//! let b = Bump::new();
28//!
29//! let s = String::from_str_in("world", &b);
30//! ```
31//!
32//! [`String`]: struct.String.html
33//! [`String::from_str_in`]: struct.String.html#method.from_str_in
34//!
35//! If you have a vector of valid UTF-8 bytes, you can make a [`String`] out of
36//! it. You can do the reverse too.
37//!
38//! ```
39//! use bumpalo::{Bump, collections::String};
40//!
41//! let b = Bump::new();
42//!
43//! let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
44//!
45//! // We know these bytes are valid, so we'll use `unwrap()`.
46//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
47//!
48//! assert_eq!("💖", sparkle_heart);
49//!
50//! let bytes = sparkle_heart.into_bytes();
51//!
52//! assert_eq!(bytes, [240, 159, 146, 150]);
53//! ```
54
55use crate::collections::str::lossy;
56use crate::collections::vec::Vec;
57use crate::Bump;
58use core::borrow::{Borrow, BorrowMut};
59use core::char::decode_utf16;
60use core::fmt;
61use core::hash;
62use core::iter::FusedIterator;
63use core::mem;
64use core::ops::Bound::{Excluded, Included, Unbounded};
65use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds};
66use core::ptr;
67use core::str::{self, Chars, Utf8Error};
68use core_alloc::borrow::Cow;
69
70/// Like the [`format!`] macro, but for creating [`bumpalo::collections::String`]s.
71///
72/// [`format!`]: https://doc.rust-lang.org/std/macro.format.html
73/// [`bumpalo::collections::String`]: collections/string/struct.String.html
74///
75/// # Examples
76///
77/// ```
78/// use bumpalo::Bump;
79///
80/// let b = Bump::new();
81///
82/// let who = "World";
83/// let s = bumpalo::format!(in &b, "Hello, {}!", who);
84/// assert_eq!(s, "Hello, World!")
85/// ```
86#[macro_export]
87macro_rules! format {
88 ( in $bump:expr, $fmt:expr, $($args:expr),* ) => {{
89 use $crate::core_alloc::fmt::Write;
90 let bump = $bump;
91 let mut s = $crate::collections::String::new_in(bump);
92 let _ = write!(&mut s, $fmt, $($args),*);
93 s
94 }};
95
96 ( in $bump:expr, $fmt:expr, $($args:expr,)* ) => {
97 $crate::format!(in $bump, $fmt, $($args),*)
98 };
99}
100
101/// A UTF-8 encoded, growable string.
102///
103/// The `String` type is the most common string type that has ownership over the
104/// contents of the string. It has a close relationship with its borrowed
105/// counterpart, the primitive [`str`].
106///
107/// [`str`]: https://doc.rust-lang.org/std/primitive.str.html
108///
109/// # Examples
110///
111/// You can create a `String` from a literal string with [`String::from_str_in`]:
112///
113/// ```
114/// use bumpalo::{Bump, collections::String};
115///
116/// let b = Bump::new();
117///
118/// let hello = String::from_str_in("Hello, world!", &b);
119/// ```
120///
121/// You can append a [`char`] to a `String` with the [`push`] method, and
122/// append a [`&str`] with the [`push_str`] method:
123///
124/// ```
125/// use bumpalo::{Bump, collections::String};
126///
127/// let b = Bump::new();
128///
129/// let mut hello = String::from_str_in("Hello, ", &b);
130///
131/// hello.push('w');
132/// hello.push_str("orld!");
133/// ```
134///
135/// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
136/// [`push`]: #method.push
137/// [`push_str`]: #method.push_str
138///
139/// If you have a vector of UTF-8 bytes, you can create a `String` from it with
140/// the [`from_utf8`] method:
141///
142/// ```
143/// use bumpalo::{Bump, collections::String};
144///
145/// let b = Bump::new();
146///
147/// // some bytes, in a vector
148/// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
149///
150/// // We know these bytes are valid, so we'll use `unwrap()`.
151/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
152///
153/// assert_eq!("💖", sparkle_heart);
154/// ```
155///
156/// [`from_utf8`]: #method.from_utf8
157///
158/// # Deref
159///
160/// `String`s implement <code>[`Deref`]<Target = [`str`]></code>, and so inherit all of [`str`]'s
161/// methods. In addition, this means that you can pass a `String` to a
162/// function which takes a [`&str`] by using an ampersand (`&`):
163///
164/// ```
165/// use bumpalo::{Bump, collections::String};
166///
167/// let b = Bump::new();
168///
169/// fn takes_str(s: &str) { }
170///
171/// let s = String::from_str_in("Hello", &b);
172///
173/// takes_str(&s);
174/// ```
175///
176/// This will create a [`&str`] from the `String` and pass it in. This
177/// conversion is very inexpensive, and so generally, functions will accept
178/// [`&str`]s as arguments unless they need a `String` for some specific
179/// reason.
180///
181/// In certain cases Rust doesn't have enough information to make this
182/// conversion, known as [`Deref`] coercion. In the following example a string
183/// slice [`&'a str`][`&str`] implements the trait `TraitExample`, and the function
184/// `example_func` takes anything that implements the trait. In this case Rust
185/// would need to make two implicit conversions, which Rust doesn't have the
186/// means to do. For that reason, the following example will not compile.
187///
188/// ```compile_fail,E0277
189/// use bumpalo::{Bump, collections::String};
190///
191/// trait TraitExample {}
192///
193/// impl<'a> TraitExample for &'a str {}
194///
195/// fn example_func<A: TraitExample>(example_arg: A) {}
196///
197/// let b = Bump::new();
198/// let example_string = String::from_str_in("example_string", &b);
199/// example_func(&example_string);
200/// ```
201///
202/// There are two options that would work instead. The first would be to
203/// change the line `example_func(&example_string);` to
204/// `example_func(example_string.as_str());`, using the method [`as_str()`]
205/// to explicitly extract the string slice containing the string. The second
206/// way changes `example_func(&example_string);` to
207/// `example_func(&*example_string);`. In this case we are dereferencing a
208/// `String` to a [`str`][`&str`], then referencing the [`str`][`&str`] back to
209/// [`&str`]. The second way is more idiomatic, however both work to do the
210/// conversion explicitly rather than relying on the implicit conversion.
211///
212/// # Representation
213///
214/// A `String` is made up of three components: a pointer to some bytes, a
215/// length, and a capacity. The pointer points to an internal buffer `String`
216/// uses to store its data. The length is the number of bytes currently stored
217/// in the buffer, and the capacity is the size of the buffer in bytes. As such,
218/// the length will always be less than or equal to the capacity.
219///
220/// This buffer is always stored on the heap.
221///
222/// You can look at these with the [`as_ptr`], [`len`], and [`capacity`]
223/// methods:
224///
225/// ```
226/// use bumpalo::{Bump, collections::String};
227/// use std::mem;
228///
229/// let b = Bump::new();
230///
231/// let mut story = String::from_str_in("Once upon a time...", &b);
232///
233/// let ptr = story.as_mut_ptr();
234/// let len = story.len();
235/// let capacity = story.capacity();
236///
237/// // story has nineteen bytes
238/// assert_eq!(19, len);
239///
240/// // Now that we have our parts, we throw the story away.
241/// mem::forget(story);
242///
243/// // We can re-build a String out of ptr, len, and capacity. This is all
244/// // unsafe because we are responsible for making sure the components are
245/// // valid:
246/// let s = unsafe { String::from_raw_parts_in(ptr, len, capacity, &b) } ;
247///
248/// assert_eq!(String::from_str_in("Once upon a time...", &b), s);
249/// ```
250///
251/// [`as_ptr`]: https://doc.rust-lang.org/std/primitive.str.html#method.as_ptr
252/// [`len`]: #method.len
253/// [`capacity`]: #method.capacity
254///
255/// If a `String` has enough capacity, adding elements to it will not
256/// re-allocate. For example, consider this program:
257///
258/// ```
259/// use bumpalo::{Bump, collections::String};
260///
261/// let b = Bump::new();
262///
263/// let mut s = String::new_in(&b);
264///
265/// println!("{}", s.capacity());
266///
267/// for _ in 0..5 {
268/// s.push_str("hello");
269/// println!("{}", s.capacity());
270/// }
271/// ```
272///
273/// This will output the following:
274///
275/// ```text
276/// 0
277/// 5
278/// 10
279/// 20
280/// 20
281/// 40
282/// ```
283///
284/// At first, we have no memory allocated at all, but as we append to the
285/// string, it increases its capacity appropriately. If we instead use the
286/// [`with_capacity_in`] method to allocate the correct capacity initially:
287///
288/// ```
289/// use bumpalo::{Bump, collections::String};
290///
291/// let b = Bump::new();
292///
293/// let mut s = String::with_capacity_in(25, &b);
294///
295/// println!("{}", s.capacity());
296///
297/// for _ in 0..5 {
298/// s.push_str("hello");
299/// println!("{}", s.capacity());
300/// }
301/// ```
302///
303/// [`with_capacity_in`]: #method.with_capacity_in
304///
305/// We end up with a different output:
306///
307/// ```text
308/// 25
309/// 25
310/// 25
311/// 25
312/// 25
313/// 25
314/// ```
315///
316/// Here, there's no need to allocate more memory inside the loop.
317///
318/// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
319/// [`Deref`]: https://doc.rust-lang.org/std/ops/trait.Deref.html
320/// [`as_str()`]: struct.String.html#method.as_str
321#[derive(PartialOrd, Eq, Ord)]
322pub struct String<'bump> {
323 vec: Vec<'bump, u8>,
324}
325
326/// A possible error value when converting a `String` from a UTF-8 byte vector.
327///
328/// This type is the error type for the [`from_utf8`] method on [`String`]. It
329/// is designed in such a way to carefully avoid reallocations: the
330/// [`into_bytes`] method will give back the byte vector that was used in the
331/// conversion attempt.
332///
333/// [`from_utf8`]: struct.String.html#method.from_utf8
334/// [`String`]: struct.String.html
335/// [`into_bytes`]: struct.FromUtf8Error.html#method.into_bytes
336///
337/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
338/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
339/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error`
340/// through the [`utf8_error`] method.
341///
342/// [`Utf8Error`]: https://doc.rust-lang.org/std/str/struct.Utf8Error.html
343/// [`std::str`]: https://doc.rust-lang.org/std/str/index.html
344/// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
345/// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
346/// [`utf8_error`]: #method.utf8_error
347///
348/// # Examples
349///
350/// Basic usage:
351///
352/// ```
353/// use bumpalo::{Bump, collections::String};
354///
355/// let b = Bump::new();
356///
357/// // some invalid bytes, in a vector
358/// let bytes = bumpalo::vec![in &b; 0, 159];
359///
360/// let value = String::from_utf8(bytes);
361///
362/// assert!(value.is_err());
363/// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
364/// ```
365#[derive(Debug)]
366pub struct FromUtf8Error<'bump> {
367 bytes: Vec<'bump, u8>,
368 error: Utf8Error,
369}
370
371/// A possible error value when converting a `String` from a UTF-16 byte slice.
372///
373/// This type is the error type for the [`from_utf16_in`] method on [`String`].
374///
375/// [`from_utf16_in`]: struct.String.html#method.from_utf16_in
376/// [`String`]: struct.String.html
377///
378/// # Examples
379///
380/// Basic usage:
381///
382/// ```
383/// use bumpalo::{Bump, collections::String};
384///
385/// let b = Bump::new();
386///
387/// // 𝄞mu<invalid>ic
388/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
389///
390/// assert!(String::from_utf16_in(v, &b).is_err());
391/// ```
392#[derive(Debug)]
393pub struct FromUtf16Error(());
394
395impl<'bump> String<'bump> {
396 /// Creates a new empty `String`.
397 ///
398 /// Given that the `String` is empty, this will not allocate any initial
399 /// buffer. While that means that this initial operation is very
400 /// inexpensive, it may cause excessive allocation later when you add
401 /// data. If you have an idea of how much data the `String` will hold,
402 /// consider the [`with_capacity_in`] method to prevent excessive
403 /// re-allocation.
404 ///
405 /// [`with_capacity_in`]: #method.with_capacity_in
406 ///
407 /// # Examples
408 ///
409 /// Basic usage:
410 ///
411 /// ```
412 /// use bumpalo::{Bump, collections::String};
413 ///
414 /// let b = Bump::new();
415 ///
416 /// let s = String::new_in(&b);
417 /// ```
418 #[inline]
419 pub fn new_in(bump: &'bump Bump) -> String<'bump> {
420 String {
421 vec: Vec::new_in(bump),
422 }
423 }
424
425 /// Creates a new empty `String` with a particular capacity.
426 ///
427 /// `String`s have an internal buffer to hold their data. The capacity is
428 /// the length of that buffer, and can be queried with the [`capacity`]
429 /// method. This method creates an empty `String`, but one with an initial
430 /// buffer that can hold `capacity` bytes. This is useful when you may be
431 /// appending a bunch of data to the `String`, reducing the number of
432 /// reallocations it needs to do.
433 ///
434 /// [`capacity`]: #method.capacity
435 ///
436 /// If the given capacity is `0`, no allocation will occur, and this method
437 /// is identical to the [`new_in`] method.
438 ///
439 /// [`new_in`]: #method.new
440 ///
441 /// # Examples
442 ///
443 /// Basic usage:
444 ///
445 /// ```
446 /// use bumpalo::{Bump, collections::String};
447 ///
448 /// let b = Bump::new();
449 ///
450 /// let mut s = String::with_capacity_in(10, &b);
451 ///
452 /// // The String contains no chars, even though it has capacity for more
453 /// assert_eq!(s.len(), 0);
454 ///
455 /// // These are all done without reallocating...
456 /// let cap = s.capacity();
457 /// for _ in 0..10 {
458 /// s.push('a');
459 /// }
460 ///
461 /// assert_eq!(s.capacity(), cap);
462 ///
463 /// // ...but this may make the vector reallocate
464 /// s.push('a');
465 /// ```
466 #[inline]
467 pub fn with_capacity_in(capacity: usize, bump: &'bump Bump) -> String<'bump> {
468 String {
469 vec: Vec::with_capacity_in(capacity, bump),
470 }
471 }
472
473 /// Converts a vector of bytes to a `String`.
474 ///
475 /// A string (`String`) is made of bytes ([`u8`]), and a vector of bytes
476 /// ([`Vec<u8>`]) is made of bytes, so this function converts between the
477 /// two. Not all byte slices are valid `String`s, however: `String`
478 /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
479 /// the bytes are valid UTF-8, and then does the conversion.
480 ///
481 /// If you are sure that the byte slice is valid UTF-8, and you don't want
482 /// to incur the overhead of the validity check, there is an unsafe version
483 /// of this function, [`from_utf8_unchecked`], which has the same behavior
484 /// but skips the check.
485 ///
486 /// This method will take care to not copy the vector, for efficiency's
487 /// sake.
488 ///
489 /// If you need a [`&str`] instead of a `String`, consider
490 /// [`str::from_utf8`].
491 ///
492 /// The inverse of this method is [`into_bytes`].
493 ///
494 /// # Errors
495 ///
496 /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
497 /// provided bytes are not UTF-8. The vector you moved in is also included.
498 ///
499 /// # Examples
500 ///
501 /// Basic usage:
502 ///
503 /// ```
504 /// use bumpalo::{Bump, collections::String};
505 ///
506 /// let b = Bump::new();
507 ///
508 /// // some bytes, in a vector
509 /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
510 ///
511 /// // We know these bytes are valid, so we'll use `unwrap()`.
512 /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
513 ///
514 /// assert_eq!("💖", sparkle_heart);
515 /// ```
516 ///
517 /// Incorrect bytes:
518 ///
519 /// ```
520 /// use bumpalo::{Bump, collections::String};
521 ///
522 /// let b = Bump::new();
523 ///
524 /// // some invalid bytes, in a vector
525 /// let sparkle_heart = bumpalo::vec![in &b; 0, 159, 146, 150];
526 ///
527 /// assert!(String::from_utf8(sparkle_heart).is_err());
528 /// ```
529 ///
530 /// See the docs for [`FromUtf8Error`] for more details on what you can do
531 /// with this error.
532 ///
533 /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
534 /// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
535 /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
536 /// [`Vec<u8>`]: ../vec/struct.Vec.html
537 /// [`str::from_utf8`]: https://doc.rust-lang.org/std/str/fn.from_utf8.html
538 /// [`into_bytes`]: struct.String.html#method.into_bytes
539 /// [`FromUtf8Error`]: struct.FromUtf8Error.html
540 /// [`Err`]: https://doc.rust-lang.org/std/result/enum.Result.html#variant.Err
541 #[inline]
542 pub fn from_utf8(vec: Vec<'bump, u8>) -> Result<String<'bump>, FromUtf8Error<'bump>> {
543 match str::from_utf8(&vec) {
544 Ok(..) => Ok(String { vec }),
545 Err(e) => Err(FromUtf8Error {
546 bytes: vec,
547 error: e,
548 }),
549 }
550 }
551
552 /// Converts a slice of bytes to a string, including invalid characters.
553 ///
554 /// Strings are made of bytes ([`u8`]), and a slice of bytes
555 /// ([`&[u8]`][slice]) is made of bytes, so this function converts
556 /// between the two. Not all byte slices are valid strings, however: strings
557 /// are required to be valid UTF-8. During this conversion,
558 /// `from_utf8_lossy_in()` will replace any invalid UTF-8 sequences with
559 /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], which looks like this: �
560 ///
561 /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
562 /// [slice]: https://doc.rust-lang.org/std/primitive.slice.html
563 /// [U+FFFD]: https://doc.rust-lang.org/std/char/constant.REPLACEMENT_CHARACTER.html
564 ///
565 /// If you are sure that the byte slice is valid UTF-8, and you don't want
566 /// to incur the overhead of the conversion, there is an unsafe version
567 /// of this function, [`from_utf8_unchecked`], which has the same behavior
568 /// but skips the checks.
569 ///
570 /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
571 ///
572 /// # Examples
573 ///
574 /// Basic usage:
575 ///
576 /// ```
577 /// use bumpalo::{collections::String, Bump, vec};
578 ///
579 /// let b = Bump::new();
580 ///
581 /// // some bytes, in a vector
582 /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
583 ///
584 /// let sparkle_heart = String::from_utf8_lossy_in(&sparkle_heart, &b);
585 ///
586 /// assert_eq!("💖", sparkle_heart);
587 /// ```
588 ///
589 /// Incorrect bytes:
590 ///
591 /// ```
592 /// use bumpalo::{collections::String, Bump, vec};
593 ///
594 /// let b = Bump::new();
595 ///
596 /// // some invalid bytes
597 /// let input = b"Hello \xF0\x90\x80World";
598 /// let output = String::from_utf8_lossy_in(input, &b);
599 ///
600 /// assert_eq!("Hello �World", output);
601 /// ```
602 pub fn from_utf8_lossy_in(v: &[u8], bump: &'bump Bump) -> String<'bump> {
603 let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks();
604
605 let (first_valid, first_broken) = if let Some(chunk) = iter.next() {
606 let lossy::Utf8LossyChunk { valid, broken } = chunk;
607 if valid.len() == v.len() {
608 debug_assert!(broken.is_empty());
609 unsafe {
610 return String::from_utf8_unchecked(Vec::from_iter_in(v.iter().cloned(), bump));
611 }
612 }
613 (valid, broken)
614 } else {
615 return String::from_str_in("", bump);
616 };
617
618 const REPLACEMENT: &str = "\u{FFFD}";
619
620 let mut res = String::with_capacity_in(v.len(), bump);
621 res.push_str(first_valid);
622 if !first_broken.is_empty() {
623 res.push_str(REPLACEMENT);
624 }
625
626 for lossy::Utf8LossyChunk { valid, broken } in iter {
627 res.push_str(valid);
628 if !broken.is_empty() {
629 res.push_str(REPLACEMENT);
630 }
631 }
632
633 res
634 }
635
636 /// Decode a UTF-16 encoded slice `v` into a `String`, returning [`Err`]
637 /// if `v` contains any invalid data.
638 ///
639 /// [`Err`]: https://doc.rust-lang.org/std/result/enum.Result.html#variant.Err
640 ///
641 /// # Examples
642 ///
643 /// Basic usage:
644 ///
645 /// ```
646 /// use bumpalo::{Bump, collections::String};
647 ///
648 /// let b = Bump::new();
649 ///
650 /// // 𝄞music
651 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063];
652 /// assert_eq!(String::from_str_in("𝄞music", &b), String::from_utf16_in(v, &b).unwrap());
653 ///
654 /// // 𝄞mu<invalid>ic
655 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
656 /// assert!(String::from_utf16_in(v, &b).is_err());
657 /// ```
658 pub fn from_utf16_in(v: &[u16], bump: &'bump Bump) -> Result<String<'bump>, FromUtf16Error> {
659 let mut ret = String::with_capacity_in(v.len(), bump);
660 for c in decode_utf16(v.iter().cloned()) {
661 if let Ok(c) = c {
662 ret.push(c);
663 } else {
664 return Err(FromUtf16Error(()));
665 }
666 }
667 Ok(ret)
668 }
669
670 /// Construct a new `String<'bump>` from a string slice.
671 ///
672 /// # Examples
673 ///
674 /// ```
675 /// use bumpalo::{Bump, collections::String};
676 ///
677 /// let b = Bump::new();
678 ///
679 /// let s = String::from_str_in("hello", &b);
680 /// assert_eq!(s, "hello");
681 /// ```
682 #[inline]
683 pub fn from_str_in(s: &str, bump: &'bump Bump) -> String<'bump> {
684 let len = s.len();
685 let mut t = String::with_capacity_in(len, bump);
686 // SAFETY:
687 // * `src` is valid for reads of `s.len()` bytes by virtue of being an allocated `&str`.
688 // * `dst` is valid for writes of `s.len()` bytes as `String::with_capacity_in(s.len(), bump)`
689 // above guarantees that.
690 // * Alignment is not relevant as `u8` has no alignment requirements.
691 // * Source and destination ranges cannot overlap as we just reserved the destination
692 // range from the bump.
693 unsafe { ptr::copy_nonoverlapping(s.as_ptr(), t.vec.as_mut_ptr(), len) };
694 // SAFETY: We reserved sufficent capacity for the string above.
695 // The elements at `0..len` were initialized by `copy_nonoverlapping` above.
696 unsafe { t.vec.set_len(len) };
697 t
698 }
699
700 /// Construct a new `String<'bump>` from an iterator of `char`s.
701 ///
702 /// # Examples
703 ///
704 /// ```
705 /// use bumpalo::{Bump, collections::String};
706 ///
707 /// let b = Bump::new();
708 ///
709 /// let s = String::from_iter_in(['h', 'e', 'l', 'l', 'o'].iter().cloned(), &b);
710 /// assert_eq!(s, "hello");
711 /// ```
712 pub fn from_iter_in<I: IntoIterator<Item = char>>(iter: I, bump: &'bump Bump) -> String<'bump> {
713 let mut s = String::new_in(bump);
714 for c in iter {
715 s.push(c);
716 }
717 s
718 }
719
720 /// Creates a new `String` from a length, capacity, and pointer.
721 ///
722 /// # Safety
723 ///
724 /// This is highly unsafe, due to the number of invariants that aren't
725 /// checked:
726 ///
727 /// * The memory at `ptr` needs to have been previously allocated by the
728 /// same allocator the standard library uses.
729 /// * `length` needs to be less than or equal to `capacity`.
730 /// * `capacity` needs to be the correct value.
731 ///
732 /// Violating these may cause problems like corrupting the allocator's
733 /// internal data structures.
734 ///
735 /// The ownership of `ptr` is effectively transferred to the
736 /// `String` which may then deallocate, reallocate or change the
737 /// contents of memory pointed to by the pointer at will. Ensure
738 /// that nothing else uses the pointer after calling this
739 /// function.
740 ///
741 /// # Examples
742 ///
743 /// Basic usage:
744 ///
745 /// ```
746 /// use bumpalo::{Bump, collections::String};
747 /// use std::mem;
748 ///
749 /// let b = Bump::new();
750 ///
751 /// unsafe {
752 /// let mut s = String::from_str_in("hello", &b);
753 /// let ptr = s.as_mut_ptr();
754 /// let len = s.len();
755 /// let capacity = s.capacity();
756 ///
757 /// mem::forget(s);
758 ///
759 /// let s = String::from_raw_parts_in(ptr, len, capacity, &b);
760 ///
761 /// assert_eq!(s, "hello");
762 /// }
763 /// ```
764 #[inline]
765 pub unsafe fn from_raw_parts_in(
766 buf: *mut u8,
767 length: usize,
768 capacity: usize,
769 bump: &'bump Bump,
770 ) -> String<'bump> {
771 String {
772 vec: Vec::from_raw_parts_in(buf, length, capacity, bump),
773 }
774 }
775
776 /// Converts a vector of bytes to a `String` without checking that the
777 /// string contains valid UTF-8.
778 ///
779 /// See the safe version, [`from_utf8`], for more details.
780 ///
781 /// [`from_utf8`]: struct.String.html#method.from_utf8
782 ///
783 /// # Safety
784 ///
785 /// This function is unsafe because it does not check that the bytes passed
786 /// to it are valid UTF-8. If this constraint is violated, it may cause
787 /// memory unsafety issues with future users of the `String`,
788 /// as it is assumed that `String`s are valid UTF-8.
789 ///
790 /// # Examples
791 ///
792 /// Basic usage:
793 ///
794 /// ```
795 /// use bumpalo::{Bump, collections::String};
796 ///
797 /// let b = Bump::new();
798 ///
799 /// // some bytes, in a vector
800 /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
801 ///
802 /// let sparkle_heart = unsafe {
803 /// String::from_utf8_unchecked(sparkle_heart)
804 /// };
805 ///
806 /// assert_eq!("💖", sparkle_heart);
807 /// ```
808 #[inline]
809 pub unsafe fn from_utf8_unchecked(bytes: Vec<'bump, u8>) -> String<'bump> {
810 String { vec: bytes }
811 }
812
813 /// Returns a shared reference to the allocator backing this `String`.
814 ///
815 /// # Examples
816 ///
817 /// ```
818 /// use bumpalo::{Bump, collections::String};
819 ///
820 /// // uses the same allocator as the provided `String`
821 /// fn copy_string<'bump>(s: &String<'bump>) -> &'bump str {
822 /// s.bump().alloc_str(s.as_str())
823 /// }
824 /// ```
825 #[inline]
826 #[must_use]
827 pub fn bump(&self) -> &'bump Bump {
828 self.vec.bump()
829 }
830
831 /// Converts a `String` into a byte vector.
832 ///
833 /// This consumes the `String`, so we do not need to copy its contents.
834 ///
835 /// # Examples
836 ///
837 /// Basic usage:
838 ///
839 /// ```
840 /// use bumpalo::{Bump, collections::String};
841 ///
842 /// let b = Bump::new();
843 ///
844 /// let s = String::from_str_in("hello", &b);
845 ///
846 /// assert_eq!(s.into_bytes(), [104, 101, 108, 108, 111]);
847 /// ```
848 #[inline]
849 pub fn into_bytes(self) -> Vec<'bump, u8> {
850 self.vec
851 }
852
853 /// Convert this `String<'bump>` into a `&'bump str`. This is analogous to
854 /// [`std::string::String::into_boxed_str`][into_boxed_str].
855 ///
856 /// [into_boxed_str]: https://doc.rust-lang.org/std/string/struct.String.html#method.into_boxed_str
857 ///
858 /// # Example
859 ///
860 /// ```
861 /// use bumpalo::{Bump, collections::String};
862 ///
863 /// let b = Bump::new();
864 ///
865 /// let s = String::from_str_in("foo", &b);
866 ///
867 /// assert_eq!(s.into_bump_str(), "foo");
868 /// ```
869 pub fn into_bump_str(self) -> &'bump str {
870 let s = unsafe {
871 let s = self.as_str();
872 mem::transmute(s)
873 };
874 mem::forget(self);
875 s
876 }
877
878 /// Extracts a string slice containing the entire `String`.
879 ///
880 /// # Examples
881 ///
882 /// Basic usage:
883 ///
884 /// ```
885 /// use bumpalo::{Bump, collections::String};
886 ///
887 /// let b = Bump::new();
888 ///
889 /// let s = String::from_str_in("foo", &b);
890 ///
891 /// assert_eq!("foo", s.as_str());
892 /// ```
893 #[inline]
894 pub fn as_str(&self) -> &str {
895 self
896 }
897
898 /// Converts a `String` into a mutable string slice.
899 ///
900 /// # Examples
901 ///
902 /// Basic usage:
903 ///
904 /// ```
905 /// use bumpalo::{Bump, collections::String};
906 ///
907 /// let b = Bump::new();
908 ///
909 /// let mut s = String::from_str_in("foobar", &b);
910 /// let s_mut_str = s.as_mut_str();
911 ///
912 /// s_mut_str.make_ascii_uppercase();
913 ///
914 /// assert_eq!("FOOBAR", s_mut_str);
915 /// ```
916 #[inline]
917 pub fn as_mut_str(&mut self) -> &mut str {
918 self
919 }
920
921 /// Appends a given string slice onto the end of this `String`.
922 ///
923 /// # Examples
924 ///
925 /// Basic usage:
926 ///
927 /// ```
928 /// use bumpalo::{Bump, collections::String};
929 ///
930 /// let b = Bump::new();
931 ///
932 /// let mut s = String::from_str_in("foo", &b);
933 ///
934 /// s.push_str("bar");
935 ///
936 /// assert_eq!("foobar", s);
937 /// ```
938 #[inline]
939 pub fn push_str(&mut self, string: &str) {
940 self.vec.extend_from_slice_copy(string.as_bytes())
941 }
942
943 /// Returns this `String`'s capacity, in bytes.
944 ///
945 /// # Examples
946 ///
947 /// Basic usage:
948 ///
949 /// ```
950 /// use bumpalo::{Bump, collections::String};
951 ///
952 /// let b = Bump::new();
953 ///
954 /// let s = String::with_capacity_in(10, &b);
955 ///
956 /// assert!(s.capacity() >= 10);
957 /// ```
958 #[inline]
959 pub fn capacity(&self) -> usize {
960 self.vec.capacity()
961 }
962
963 /// Ensures that this `String`'s capacity is at least `additional` bytes
964 /// larger than its length.
965 ///
966 /// The capacity may be increased by more than `additional` bytes if it
967 /// chooses, to prevent frequent reallocations.
968 ///
969 /// If you do not want this "at least" behavior, see the [`reserve_exact`]
970 /// method.
971 ///
972 /// # Panics
973 ///
974 /// Panics if the new capacity overflows [`usize`].
975 ///
976 /// [`reserve_exact`]: struct.String.html#method.reserve_exact
977 /// [`usize`]: https://doc.rust-lang.org/std/primitive.usize.html
978 ///
979 /// # Examples
980 ///
981 /// Basic usage:
982 ///
983 /// ```
984 /// use bumpalo::{Bump, collections::String};
985 ///
986 /// let b = Bump::new();
987 ///
988 /// let mut s = String::new_in(&b);
989 ///
990 /// s.reserve(10);
991 ///
992 /// assert!(s.capacity() >= 10);
993 /// ```
994 ///
995 /// This may not actually increase the capacity:
996 ///
997 /// ```
998 /// use bumpalo::{Bump, collections::String};
999 ///
1000 /// let b = Bump::new();
1001 ///
1002 /// let mut s = String::with_capacity_in(10, &b);
1003 /// s.push('a');
1004 /// s.push('b');
1005 ///
1006 /// // s now has a length of 2 and a capacity of 10
1007 /// assert_eq!(2, s.len());
1008 /// assert_eq!(10, s.capacity());
1009 ///
1010 /// // Since we already have an extra 8 capacity, calling this...
1011 /// s.reserve(8);
1012 ///
1013 /// // ... doesn't actually increase.
1014 /// assert_eq!(10, s.capacity());
1015 /// ```
1016 #[inline]
1017 pub fn reserve(&mut self, additional: usize) {
1018 self.vec.reserve(additional)
1019 }
1020
1021 /// Ensures that this `String`'s capacity is `additional` bytes
1022 /// larger than its length.
1023 ///
1024 /// Consider using the [`reserve`] method unless you absolutely know
1025 /// better than the allocator.
1026 ///
1027 /// [`reserve`]: #method.reserve
1028 ///
1029 /// # Panics
1030 ///
1031 /// Panics if the new capacity overflows `usize`.
1032 ///
1033 /// # Examples
1034 ///
1035 /// Basic usage:
1036 ///
1037 /// ```
1038 /// use bumpalo::{Bump, collections::String};
1039 ///
1040 /// let b = Bump::new();
1041 ///
1042 /// let mut s = String::new_in(&b);
1043 ///
1044 /// s.reserve_exact(10);
1045 ///
1046 /// assert!(s.capacity() >= 10);
1047 /// ```
1048 ///
1049 /// This may not actually increase the capacity:
1050 ///
1051 /// ```
1052 /// use bumpalo::{Bump, collections::String};
1053 ///
1054 /// let b = Bump::new();
1055 ///
1056 /// let mut s = String::with_capacity_in(10, &b);
1057 /// s.push('a');
1058 /// s.push('b');
1059 ///
1060 /// // s now has a length of 2 and a capacity of 10
1061 /// assert_eq!(2, s.len());
1062 /// assert_eq!(10, s.capacity());
1063 ///
1064 /// // Since we already have an extra 8 capacity, calling this...
1065 /// s.reserve_exact(8);
1066 ///
1067 /// // ... doesn't actually increase.
1068 /// assert_eq!(10, s.capacity());
1069 /// ```
1070 #[inline]
1071 pub fn reserve_exact(&mut self, additional: usize) {
1072 self.vec.reserve_exact(additional)
1073 }
1074
1075 /// Shrinks the capacity of this `String` to match its length.
1076 ///
1077 /// # Examples
1078 ///
1079 /// Basic usage:
1080 ///
1081 /// ```
1082 /// use bumpalo::{Bump, collections::String};
1083 ///
1084 /// let b = Bump::new();
1085 ///
1086 /// let mut s = String::from_str_in("foo", &b);
1087 ///
1088 /// s.reserve(100);
1089 /// assert!(s.capacity() >= 100);
1090 ///
1091 /// s.shrink_to_fit();
1092 /// assert_eq!(3, s.capacity());
1093 /// ```
1094 #[inline]
1095 pub fn shrink_to_fit(&mut self) {
1096 self.vec.shrink_to_fit()
1097 }
1098
1099 /// Appends the given [`char`] to the end of this `String`.
1100 ///
1101 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1102 ///
1103 /// # Examples
1104 ///
1105 /// Basic usage:
1106 ///
1107 /// ```
1108 /// use bumpalo::{Bump, collections::String};
1109 ///
1110 /// let b = Bump::new();
1111 ///
1112 /// let mut s = String::from_str_in("abc", &b);
1113 ///
1114 /// s.push('1');
1115 /// s.push('2');
1116 /// s.push('3');
1117 ///
1118 /// assert_eq!("abc123", s);
1119 /// ```
1120 #[inline]
1121 pub fn push(&mut self, ch: char) {
1122 match ch.len_utf8() {
1123 1 => self.vec.push(ch as u8),
1124 _ => self
1125 .vec
1126 .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
1127 }
1128 }
1129
1130 /// Returns a byte slice of this `String`'s contents.
1131 ///
1132 /// The inverse of this method is [`from_utf8`].
1133 ///
1134 /// [`from_utf8`]: #method.from_utf8
1135 ///
1136 /// # Examples
1137 ///
1138 /// Basic usage:
1139 ///
1140 /// ```
1141 /// use bumpalo::{Bump, collections::String};
1142 ///
1143 /// let b = Bump::new();
1144 ///
1145 /// let s = String::from_str_in("hello", &b);
1146 ///
1147 /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
1148 /// ```
1149 #[inline]
1150 pub fn as_bytes(&self) -> &[u8] {
1151 &self.vec
1152 }
1153
1154 /// Shortens this `String` to the specified length.
1155 ///
1156 /// If `new_len` is greater than the string's current length, this has no
1157 /// effect.
1158 ///
1159 /// Note that this method has no effect on the allocated capacity
1160 /// of the string.
1161 ///
1162 /// # Panics
1163 ///
1164 /// Panics if `new_len` does not lie on a [`char`] boundary.
1165 ///
1166 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1167 ///
1168 /// # Examples
1169 ///
1170 /// Basic usage:
1171 ///
1172 /// ```
1173 /// use bumpalo::{Bump, collections::String};
1174 ///
1175 /// let b = Bump::new();
1176 ///
1177 /// let mut s = String::from_str_in("hello", &b);
1178 ///
1179 /// s.truncate(2);
1180 ///
1181 /// assert_eq!("he", s);
1182 /// ```
1183 #[inline]
1184 pub fn truncate(&mut self, new_len: usize) {
1185 if new_len <= self.len() {
1186 assert!(self.is_char_boundary(new_len));
1187 self.vec.truncate(new_len)
1188 }
1189 }
1190
1191 /// Removes the last character from the string buffer and returns it.
1192 ///
1193 /// Returns [`None`] if this `String` is empty.
1194 ///
1195 /// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
1196 ///
1197 /// # Examples
1198 ///
1199 /// Basic usage:
1200 ///
1201 /// ```
1202 /// use bumpalo::{Bump, collections::String};
1203 ///
1204 /// let b = Bump::new();
1205 ///
1206 /// let mut s = String::from_str_in("foo", &b);
1207 ///
1208 /// assert_eq!(s.pop(), Some('o'));
1209 /// assert_eq!(s.pop(), Some('o'));
1210 /// assert_eq!(s.pop(), Some('f'));
1211 ///
1212 /// assert_eq!(s.pop(), None);
1213 /// ```
1214 #[inline]
1215 pub fn pop(&mut self) -> Option<char> {
1216 let ch = self.chars().rev().next()?;
1217 let newlen = self.len() - ch.len_utf8();
1218 unsafe {
1219 self.vec.set_len(newlen);
1220 }
1221 Some(ch)
1222 }
1223
1224 /// Removes a [`char`] from this `String` at a byte position and returns it.
1225 ///
1226 /// This is an `O(n)` operation, as it requires copying every element in the
1227 /// buffer.
1228 ///
1229 /// # Panics
1230 ///
1231 /// Panics if `idx` is larger than or equal to the `String`'s length,
1232 /// or if it does not lie on a [`char`] boundary.
1233 ///
1234 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1235 ///
1236 /// # Examples
1237 ///
1238 /// Basic usage:
1239 ///
1240 /// ```
1241 /// use bumpalo::{Bump, collections::String};
1242 ///
1243 /// let b = Bump::new();
1244 ///
1245 /// let mut s = String::from_str_in("foo", &b);
1246 ///
1247 /// assert_eq!(s.remove(0), 'f');
1248 /// assert_eq!(s.remove(1), 'o');
1249 /// assert_eq!(s.remove(0), 'o');
1250 /// ```
1251 #[inline]
1252 pub fn remove(&mut self, idx: usize) -> char {
1253 let ch = match self[idx..].chars().next() {
1254 Some(ch) => ch,
1255 None => panic!("cannot remove a char from the end of a string"),
1256 };
1257
1258 let next = idx + ch.len_utf8();
1259 let len = self.len();
1260 unsafe {
1261 ptr::copy(
1262 self.vec.as_ptr().add(next),
1263 self.vec.as_mut_ptr().add(idx),
1264 len - next,
1265 );
1266 self.vec.set_len(len - (next - idx));
1267 }
1268 ch
1269 }
1270
1271 /// Retains only the characters specified by the predicate.
1272 ///
1273 /// In other words, remove all characters `c` such that `f(c)` returns `false`.
1274 /// This method operates in place and preserves the order of the retained
1275 /// characters.
1276 ///
1277 /// # Examples
1278 ///
1279 /// ```
1280 /// use bumpalo::{Bump, collections::String};
1281 ///
1282 /// let b = Bump::new();
1283 ///
1284 /// let mut s = String::from_str_in("f_o_ob_ar", &b);
1285 ///
1286 /// s.retain(|c| c != '_');
1287 ///
1288 /// assert_eq!(s, "foobar");
1289 /// ```
1290 #[inline]
1291 pub fn retain<F>(&mut self, mut f: F)
1292 where
1293 F: FnMut(char) -> bool,
1294 {
1295 let len = self.len();
1296 let mut del_bytes = 0;
1297 let mut idx = 0;
1298
1299 while idx < len {
1300 let ch = unsafe { self.get_unchecked(idx..len).chars().next().unwrap() };
1301 let ch_len = ch.len_utf8();
1302
1303 if !f(ch) {
1304 del_bytes += ch_len;
1305 } else if del_bytes > 0 {
1306 unsafe {
1307 ptr::copy(
1308 self.vec.as_ptr().add(idx),
1309 self.vec.as_mut_ptr().add(idx - del_bytes),
1310 ch_len,
1311 );
1312 }
1313 }
1314
1315 // Point idx to the next char
1316 idx += ch_len;
1317 }
1318
1319 if del_bytes > 0 {
1320 unsafe {
1321 self.vec.set_len(len - del_bytes);
1322 }
1323 }
1324 }
1325
1326 /// Inserts a character into this `String` at a byte position.
1327 ///
1328 /// This is an `O(n)` operation as it requires copying every element in the
1329 /// buffer.
1330 ///
1331 /// # Panics
1332 ///
1333 /// Panics if `idx` is larger than the `String`'s length, or if it does not
1334 /// lie on a [`char`] boundary.
1335 ///
1336 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1337 ///
1338 /// # Examples
1339 ///
1340 /// Basic usage:
1341 ///
1342 /// ```
1343 /// use bumpalo::{Bump, collections::String};
1344 ///
1345 /// let b = Bump::new();
1346 ///
1347 /// let mut s = String::with_capacity_in(3, &b);
1348 ///
1349 /// s.insert(0, 'f');
1350 /// s.insert(1, 'o');
1351 /// s.insert(2, 'o');
1352 ///
1353 /// assert_eq!("foo", s);
1354 /// ```
1355 #[inline]
1356 pub fn insert(&mut self, idx: usize, ch: char) {
1357 assert!(self.is_char_boundary(idx));
1358 let mut bits = [0; 4];
1359 let bits = ch.encode_utf8(&mut bits).as_bytes();
1360
1361 unsafe {
1362 self.insert_bytes(idx, bits);
1363 }
1364 }
1365
1366 unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) {
1367 let len = self.len();
1368 let amt = bytes.len();
1369 self.vec.reserve(amt);
1370
1371 ptr::copy(
1372 self.vec.as_ptr().add(idx),
1373 self.vec.as_mut_ptr().add(idx + amt),
1374 len - idx,
1375 );
1376 ptr::copy(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt);
1377 self.vec.set_len(len + amt);
1378 }
1379
1380 /// Inserts a string slice into this `String` at a byte position.
1381 ///
1382 /// This is an `O(n)` operation as it requires copying every element in the
1383 /// buffer.
1384 ///
1385 /// # Panics
1386 ///
1387 /// Panics if `idx` is larger than the `String`'s length, or if it does not
1388 /// lie on a [`char`] boundary.
1389 ///
1390 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1391 ///
1392 /// # Examples
1393 ///
1394 /// Basic usage:
1395 ///
1396 /// ```
1397 /// use bumpalo::{Bump, collections::String};
1398 ///
1399 /// let b = Bump::new();
1400 ///
1401 /// let mut s = String::from_str_in("bar", &b);
1402 ///
1403 /// s.insert_str(0, "foo");
1404 ///
1405 /// assert_eq!("foobar", s);
1406 /// ```
1407 #[inline]
1408 pub fn insert_str(&mut self, idx: usize, string: &str) {
1409 assert!(self.is_char_boundary(idx));
1410
1411 unsafe {
1412 self.insert_bytes(idx, string.as_bytes());
1413 }
1414 }
1415
1416 /// Returns a mutable reference to the contents of this `String`.
1417 ///
1418 /// # Safety
1419 ///
1420 /// This function is unsafe because the returned `&mut Vec` allows writing
1421 /// bytes which are not valid UTF-8. If this constraint is violated, using
1422 /// the original `String` after dropping the `&mut Vec` may violate memory
1423 /// safety, as it is assumed that `String`s are valid UTF-8.
1424 ///
1425 /// # Examples
1426 ///
1427 /// Basic usage:
1428 ///
1429 /// ```
1430 /// use bumpalo::{Bump, collections::String};
1431 ///
1432 /// let b = Bump::new();
1433 ///
1434 /// let mut s = String::from_str_in("hello", &b);
1435 ///
1436 /// unsafe {
1437 /// let vec = s.as_mut_vec();
1438 /// assert_eq!(vec, &[104, 101, 108, 108, 111]);
1439 ///
1440 /// vec.reverse();
1441 /// }
1442 /// assert_eq!(s, "olleh");
1443 /// ```
1444 #[inline]
1445 pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<'bump, u8> {
1446 &mut self.vec
1447 }
1448
1449 /// Returns the length of this `String`, in bytes.
1450 ///
1451 /// # Examples
1452 ///
1453 /// Basic usage:
1454 ///
1455 /// ```
1456 /// use bumpalo::{Bump, collections::String};
1457 ///
1458 /// let b = Bump::new();
1459 ///
1460 /// let a = String::from_str_in("foo", &b);
1461 ///
1462 /// assert_eq!(a.len(), 3);
1463 /// ```
1464 #[inline]
1465 pub fn len(&self) -> usize {
1466 self.vec.len()
1467 }
1468
1469 /// Returns `true` if this `String` has a length of zero.
1470 ///
1471 /// Returns `false` otherwise.
1472 ///
1473 /// # Examples
1474 ///
1475 /// Basic usage:
1476 ///
1477 /// ```
1478 /// use bumpalo::{Bump, collections::String};
1479 ///
1480 /// let b = Bump::new();
1481 ///
1482 /// let mut v = String::new_in(&b);
1483 /// assert!(v.is_empty());
1484 ///
1485 /// v.push('a');
1486 /// assert!(!v.is_empty());
1487 /// ```
1488 #[inline]
1489 pub fn is_empty(&self) -> bool {
1490 self.len() == 0
1491 }
1492
1493 /// Splits the string into two at the given index.
1494 ///
1495 /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and
1496 /// the returned `String` contains bytes `[at, len)`. `at` must be on the
1497 /// boundary of a UTF-8 code point.
1498 ///
1499 /// Note that the capacity of `self` does not change.
1500 ///
1501 /// # Panics
1502 ///
1503 /// Panics if `at` is not on a UTF-8 code point boundary, or if it is beyond the last
1504 /// code point of the string.
1505 ///
1506 /// # Examples
1507 ///
1508 /// ```
1509 /// use bumpalo::{Bump, collections::String};
1510 ///
1511 /// let b = Bump::new();
1512 ///
1513 /// let mut hello = String::from_str_in("Hello, World!", &b);
1514 /// let world = hello.split_off(7);
1515 /// assert_eq!(hello, "Hello, ");
1516 /// assert_eq!(world, "World!");
1517 /// ```
1518 #[inline]
1519 pub fn split_off(&mut self, at: usize) -> String<'bump> {
1520 assert!(self.is_char_boundary(at));
1521 let other = self.vec.split_off(at);
1522 unsafe { String::from_utf8_unchecked(other) }
1523 }
1524
1525 /// Truncates this `String`, removing all contents.
1526 ///
1527 /// While this means the `String` will have a length of zero, it does not
1528 /// touch its capacity.
1529 ///
1530 /// # Examples
1531 ///
1532 /// Basic usage:
1533 ///
1534 /// ```
1535 /// use bumpalo::{Bump, collections::String};
1536 ///
1537 /// let b = Bump::new();
1538 ///
1539 /// let mut s = String::from_str_in("foo", &b);
1540 ///
1541 /// s.clear();
1542 ///
1543 /// assert!(s.is_empty());
1544 /// assert_eq!(0, s.len());
1545 /// assert_eq!(3, s.capacity());
1546 /// ```
1547 #[inline]
1548 pub fn clear(&mut self) {
1549 self.vec.clear()
1550 }
1551
1552 /// Creates a draining iterator that removes the specified range in the `String`
1553 /// and yields the removed `chars`.
1554 ///
1555 /// Note: The element range is removed even if the iterator is not
1556 /// consumed until the end.
1557 ///
1558 /// # Panics
1559 ///
1560 /// Panics if the starting point or end point do not lie on a [`char`]
1561 /// boundary, or if they're out of bounds.
1562 ///
1563 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1564 ///
1565 /// # Examples
1566 ///
1567 /// Basic usage:
1568 ///
1569 /// ```
1570 /// use bumpalo::{Bump, collections::String};
1571 ///
1572 /// let b = Bump::new();
1573 ///
1574 /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1575 /// let beta_offset = s.find('β').unwrap_or(s.len());
1576 ///
1577 /// // Remove the range up until the β from the string
1578 /// let t = String::from_iter_in(s.drain(..beta_offset), &b);
1579 /// assert_eq!(t, "α is alpha, ");
1580 /// assert_eq!(s, "β is beta");
1581 ///
1582 /// // A full range clears the string
1583 /// drop(s.drain(..));
1584 /// assert_eq!(s, "");
1585 /// ```
1586 pub fn drain<'a, R>(&'a mut self, range: R) -> Drain<'a, 'bump>
1587 where
1588 R: RangeBounds<usize>,
1589 {
1590 // Memory safety
1591 //
1592 // The String version of Drain does not have the memory safety issues
1593 // of the vector version. The data is just plain bytes.
1594 // Because the range removal happens in Drop, if the Drain iterator is leaked,
1595 // the removal will not happen.
1596 let len = self.len();
1597 let start = match range.start_bound() {
1598 Included(&n) => n,
1599 Excluded(&n) => n + 1,
1600 Unbounded => 0,
1601 };
1602 let end = match range.end_bound() {
1603 Included(&n) => n + 1,
1604 Excluded(&n) => n,
1605 Unbounded => len,
1606 };
1607
1608 // Take out two simultaneous borrows. The &mut String won't be accessed
1609 // until iteration is over, in Drop.
1610 let self_ptr = self as *mut _;
1611 // slicing does the appropriate bounds checks
1612 let chars_iter = self[start..end].chars();
1613
1614 Drain {
1615 start,
1616 end,
1617 iter: chars_iter,
1618 string: self_ptr,
1619 }
1620 }
1621
1622 /// Removes the specified range in the string,
1623 /// and replaces it with the given string.
1624 /// The given string doesn't need to be the same length as the range.
1625 ///
1626 /// # Panics
1627 ///
1628 /// Panics if the starting point or end point do not lie on a [`char`]
1629 /// boundary, or if they're out of bounds.
1630 ///
1631 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1632 /// [`Vec::splice`]: ../vec/struct.Vec.html#method.splice
1633 ///
1634 /// # Examples
1635 ///
1636 /// Basic usage:
1637 ///
1638 /// ```
1639 /// use bumpalo::{Bump, collections::String};
1640 ///
1641 /// let b = Bump::new();
1642 ///
1643 /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1644 /// let beta_offset = s.find('β').unwrap_or(s.len());
1645 ///
1646 /// // Replace the range up until the β from the string
1647 /// s.replace_range(..beta_offset, "Α is capital alpha; ");
1648 /// assert_eq!(s, "Α is capital alpha; β is beta");
1649 /// ```
1650 pub fn replace_range<R>(&mut self, range: R, replace_with: &str)
1651 where
1652 R: RangeBounds<usize>,
1653 {
1654 // Memory safety
1655 //
1656 // Replace_range does not have the memory safety issues of a vector Splice.
1657 // of the vector version. The data is just plain bytes.
1658
1659 match range.start_bound() {
1660 Included(&n) => assert!(self.is_char_boundary(n)),
1661 Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
1662 Unbounded => {}
1663 };
1664 match range.end_bound() {
1665 Included(&n) => assert!(self.is_char_boundary(n + 1)),
1666 Excluded(&n) => assert!(self.is_char_boundary(n)),
1667 Unbounded => {}
1668 };
1669
1670 unsafe { self.as_mut_vec() }.splice(range, replace_with.bytes());
1671 }
1672}
1673
1674impl<'bump> FromUtf8Error<'bump> {
1675 /// Returns a slice of bytes that were attempted to convert to a `String`.
1676 ///
1677 /// # Examples
1678 ///
1679 /// Basic usage:
1680 ///
1681 /// ```
1682 /// use bumpalo::{Bump, collections::String};
1683 ///
1684 /// let b = Bump::new();
1685 ///
1686 /// // some invalid bytes, in a vector
1687 /// let bytes = bumpalo::vec![in &b; 0, 159];
1688 ///
1689 /// let value = String::from_utf8(bytes);
1690 ///
1691 /// assert_eq!(&[0, 159], value.unwrap_err().as_bytes());
1692 /// ```
1693 pub fn as_bytes(&self) -> &[u8] {
1694 &self.bytes[..]
1695 }
1696
1697 /// Returns the bytes that were attempted to convert to a `String`.
1698 ///
1699 /// This method is carefully constructed to avoid allocation. It will
1700 /// consume the error, moving out the bytes, so that a copy of the bytes
1701 /// does not need to be made.
1702 ///
1703 /// # Examples
1704 ///
1705 /// Basic usage:
1706 ///
1707 /// ```
1708 /// use bumpalo::{Bump, collections::String};
1709 ///
1710 /// let b = Bump::new();
1711 ///
1712 /// // some invalid bytes, in a vector
1713 /// let bytes = bumpalo::vec![in &b; 0, 159];
1714 ///
1715 /// let value = String::from_utf8(bytes);
1716 ///
1717 /// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
1718 /// ```
1719 pub fn into_bytes(self) -> Vec<'bump, u8> {
1720 self.bytes
1721 }
1722
1723 /// Fetch a `Utf8Error` to get more details about the conversion failure.
1724 ///
1725 /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
1726 /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
1727 /// an analogue to `FromUtf8Error`. See its documentation for more details
1728 /// on using it.
1729 ///
1730 /// [`Utf8Error`]: https://doc.rust-lang.org/std/str/struct.Utf8Error.html
1731 /// [`std::str`]: https://doc.rust-lang.org/std/str/index.html
1732 /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
1733 /// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
1734 ///
1735 /// # Examples
1736 ///
1737 /// Basic usage:
1738 ///
1739 /// ```
1740 /// use bumpalo::{Bump, collections::String};
1741 ///
1742 /// let b = Bump::new();
1743 ///
1744 /// // some invalid bytes, in a vector
1745 /// let bytes = bumpalo::vec![in &b; 0, 159];
1746 ///
1747 /// let error = String::from_utf8(bytes).unwrap_err().utf8_error();
1748 ///
1749 /// // the first byte is invalid here
1750 /// assert_eq!(1, error.valid_up_to());
1751 /// ```
1752 pub fn utf8_error(&self) -> Utf8Error {
1753 self.error
1754 }
1755}
1756
1757impl<'bump> fmt::Display for FromUtf8Error<'bump> {
1758 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1759 fmt::Display::fmt(&self.error, f)
1760 }
1761}
1762
1763impl fmt::Display for FromUtf16Error {
1764 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1765 fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
1766 }
1767}
1768
1769impl<'bump> Clone for String<'bump> {
1770 fn clone(&self) -> Self {
1771 String {
1772 vec: self.vec.clone(),
1773 }
1774 }
1775
1776 fn clone_from(&mut self, source: &Self) {
1777 self.vec.clone_from(&source.vec);
1778 }
1779}
1780
1781impl<'bump> Extend<char> for String<'bump> {
1782 fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) {
1783 let iterator = iter.into_iter();
1784 let (lower_bound, _) = iterator.size_hint();
1785 self.reserve(lower_bound);
1786 for ch in iterator {
1787 self.push(ch)
1788 }
1789 }
1790}
1791
1792impl<'a, 'bump> Extend<&'a char> for String<'bump> {
1793 fn extend<I: IntoIterator<Item = &'a char>>(&mut self, iter: I) {
1794 self.extend(iter.into_iter().cloned());
1795 }
1796}
1797
1798impl<'a, 'bump> Extend<&'a str> for String<'bump> {
1799 fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) {
1800 for s in iter {
1801 self.push_str(s)
1802 }
1803 }
1804}
1805
1806impl<'bump> Extend<String<'bump>> for String<'bump> {
1807 fn extend<I: IntoIterator<Item = String<'bump>>>(&mut self, iter: I) {
1808 for s in iter {
1809 self.push_str(&s)
1810 }
1811 }
1812}
1813
1814impl<'bump> Extend<core_alloc::string::String> for String<'bump> {
1815 fn extend<I: IntoIterator<Item = core_alloc::string::String>>(&mut self, iter: I) {
1816 for s in iter {
1817 self.push_str(&s)
1818 }
1819 }
1820}
1821
1822impl<'a, 'bump> Extend<Cow<'a, str>> for String<'bump> {
1823 fn extend<I: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: I) {
1824 for s in iter {
1825 self.push_str(&s)
1826 }
1827 }
1828}
1829
1830impl<'bump> PartialEq for String<'bump> {
1831 #[inline]
1832 fn eq(&self, other: &String) -> bool {
1833 PartialEq::eq(&self[..], &other[..])
1834 }
1835}
1836
1837macro_rules! impl_eq {
1838 ($lhs:ty, $rhs: ty) => {
1839 impl<'a, 'bump> PartialEq<$rhs> for $lhs {
1840 #[inline]
1841 fn eq(&self, other: &$rhs) -> bool {
1842 PartialEq::eq(&self[..], &other[..])
1843 }
1844 }
1845
1846 impl<'a, 'b, 'bump> PartialEq<$lhs> for $rhs {
1847 #[inline]
1848 fn eq(&self, other: &$lhs) -> bool {
1849 PartialEq::eq(&self[..], &other[..])
1850 }
1851 }
1852 };
1853}
1854
1855impl_eq! { String<'bump>, str }
1856impl_eq! { String<'bump>, &'a str }
1857impl_eq! { Cow<'a, str>, String<'bump> }
1858impl_eq! { core_alloc::string::String, String<'bump> }
1859
1860impl<'bump> fmt::Display for String<'bump> {
1861 #[inline]
1862 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1863 fmt::Display::fmt(&**self, f)
1864 }
1865}
1866
1867impl<'bump> fmt::Debug for String<'bump> {
1868 #[inline]
1869 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1870 fmt::Debug::fmt(&**self, f)
1871 }
1872}
1873
1874impl<'bump> hash::Hash for String<'bump> {
1875 #[inline]
1876 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
1877 (**self).hash(hasher)
1878 }
1879}
1880
1881/// Implements the `+` operator for concatenating two strings.
1882///
1883/// This consumes the `String<'bump>` on the left-hand side and re-uses its buffer (growing it if
1884/// necessary). This is done to avoid allocating a new `String<'bump>` and copying the entire contents on
1885/// every operation, which would lead to `O(n^2)` running time when building an `n`-byte string by
1886/// repeated concatenation.
1887///
1888/// The string on the right-hand side is only borrowed; its contents are copied into the returned
1889/// `String<'bump>`.
1890///
1891/// # Examples
1892///
1893/// Concatenating two `String<'bump>`s takes the first by value and borrows the second:
1894///
1895/// ```
1896/// use bumpalo::{Bump, collections::String};
1897///
1898/// let bump = Bump::new();
1899///
1900/// let a = String::from_str_in("hello", &bump);
1901/// let b = String::from_str_in(" world", &bump);
1902/// let c = a + &b;
1903/// // `a` is moved and can no longer be used here.
1904/// ```
1905///
1906/// If you want to keep using the first `String`, you can clone it and append to the clone instead:
1907///
1908/// ```
1909/// use bumpalo::{Bump, collections::String};
1910///
1911/// let bump = Bump::new();
1912///
1913/// let a = String::from_str_in("hello", &bump);
1914/// let b = String::from_str_in(" world", &bump);
1915/// let c = a.clone() + &b;
1916/// // `a` is still valid here.
1917/// ```
1918///
1919/// Concatenating `&str` slices can be done by converting the first to a `String`:
1920///
1921/// ```
1922/// use bumpalo::{Bump, collections::String};
1923///
1924/// let bump = Bump::new();
1925///
1926/// let a = "hello";
1927/// let b = " world";
1928/// let c = String::from_str_in(a, &bump) + b;
1929/// ```
1930impl<'a, 'bump> Add<&'a str> for String<'bump> {
1931 type Output = String<'bump>;
1932
1933 #[inline]
1934 fn add(mut self, other: &str) -> String<'bump> {
1935 self.push_str(other);
1936 self
1937 }
1938}
1939
1940/// Implements the `+=` operator for appending to a `String<'bump>`.
1941///
1942/// This has the same behavior as the [`push_str`][String::push_str] method.
1943impl<'a, 'bump> AddAssign<&'a str> for String<'bump> {
1944 #[inline]
1945 fn add_assign(&mut self, other: &str) {
1946 self.push_str(other);
1947 }
1948}
1949
1950impl<'bump> ops::Index<ops::Range<usize>> for String<'bump> {
1951 type Output = str;
1952
1953 #[inline]
1954 fn index(&self, index: ops::Range<usize>) -> &str {
1955 &self[..][index]
1956 }
1957}
1958impl<'bump> ops::Index<ops::RangeTo<usize>> for String<'bump> {
1959 type Output = str;
1960
1961 #[inline]
1962 fn index(&self, index: ops::RangeTo<usize>) -> &str {
1963 &self[..][index]
1964 }
1965}
1966impl<'bump> ops::Index<ops::RangeFrom<usize>> for String<'bump> {
1967 type Output = str;
1968
1969 #[inline]
1970 fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1971 &self[..][index]
1972 }
1973}
1974impl<'bump> ops::Index<ops::RangeFull> for String<'bump> {
1975 type Output = str;
1976
1977 #[inline]
1978 fn index(&self, _index: ops::RangeFull) -> &str {
1979 unsafe { str::from_utf8_unchecked(&self.vec) }
1980 }
1981}
1982impl<'bump> ops::Index<ops::RangeInclusive<usize>> for String<'bump> {
1983 type Output = str;
1984
1985 #[inline]
1986 fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1987 Index::index(&**self, index)
1988 }
1989}
1990impl<'bump> ops::Index<ops::RangeToInclusive<usize>> for String<'bump> {
1991 type Output = str;
1992
1993 #[inline]
1994 fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1995 Index::index(&**self, index)
1996 }
1997}
1998
1999impl<'bump> ops::IndexMut<ops::Range<usize>> for String<'bump> {
2000 #[inline]
2001 fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
2002 &mut self[..][index]
2003 }
2004}
2005impl<'bump> ops::IndexMut<ops::RangeTo<usize>> for String<'bump> {
2006 #[inline]
2007 fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
2008 &mut self[..][index]
2009 }
2010}
2011impl<'bump> ops::IndexMut<ops::RangeFrom<usize>> for String<'bump> {
2012 #[inline]
2013 fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
2014 &mut self[..][index]
2015 }
2016}
2017impl<'bump> ops::IndexMut<ops::RangeFull> for String<'bump> {
2018 #[inline]
2019 fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
2020 unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2021 }
2022}
2023impl<'bump> ops::IndexMut<ops::RangeInclusive<usize>> for String<'bump> {
2024 #[inline]
2025 fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
2026 IndexMut::index_mut(&mut **self, index)
2027 }
2028}
2029impl<'bump> ops::IndexMut<ops::RangeToInclusive<usize>> for String<'bump> {
2030 #[inline]
2031 fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
2032 IndexMut::index_mut(&mut **self, index)
2033 }
2034}
2035
2036impl<'bump> ops::Deref for String<'bump> {
2037 type Target = str;
2038
2039 #[inline]
2040 fn deref(&self) -> &str {
2041 unsafe { str::from_utf8_unchecked(&self.vec) }
2042 }
2043}
2044
2045impl<'bump> ops::DerefMut for String<'bump> {
2046 #[inline]
2047 fn deref_mut(&mut self) -> &mut str {
2048 unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2049 }
2050}
2051
2052impl<'bump> AsRef<str> for String<'bump> {
2053 #[inline]
2054 fn as_ref(&self) -> &str {
2055 self
2056 }
2057}
2058
2059impl<'bump> AsRef<[u8]> for String<'bump> {
2060 #[inline]
2061 fn as_ref(&self) -> &[u8] {
2062 self.as_bytes()
2063 }
2064}
2065
2066impl<'bump> fmt::Write for String<'bump> {
2067 #[inline]
2068 fn write_str(&mut self, s: &str) -> fmt::Result {
2069 self.push_str(s);
2070 Ok(())
2071 }
2072
2073 #[inline]
2074 fn write_char(&mut self, c: char) -> fmt::Result {
2075 self.push(c);
2076 Ok(())
2077 }
2078}
2079
2080impl<'bump> Borrow<str> for String<'bump> {
2081 #[inline]
2082 fn borrow(&self) -> &str {
2083 &self[..]
2084 }
2085}
2086
2087impl<'bump> BorrowMut<str> for String<'bump> {
2088 #[inline]
2089 fn borrow_mut(&mut self) -> &mut str {
2090 &mut self[..]
2091 }
2092}
2093
2094/// A draining iterator for `String`.
2095///
2096/// This struct is created by the [`String::drain`] method. See its
2097/// documentation for more information.
2098pub struct Drain<'a, 'bump> {
2099 /// Will be used as &'a mut String in the destructor
2100 string: *mut String<'bump>,
2101 /// Start of part to remove
2102 start: usize,
2103 /// End of part to remove
2104 end: usize,
2105 /// Current remaining range to remove
2106 iter: Chars<'a>,
2107}
2108
2109impl<'a, 'bump> fmt::Debug for Drain<'a, 'bump> {
2110 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2111 f.pad("Drain { .. }")
2112 }
2113}
2114
2115unsafe impl<'a, 'bump> Sync for Drain<'a, 'bump> {}
2116unsafe impl<'a, 'bump> Send for Drain<'a, 'bump> {}
2117
2118impl<'a, 'bump> Drop for Drain<'a, 'bump> {
2119 fn drop(&mut self) {
2120 unsafe {
2121 // Use Vec::drain. "Reaffirm" the bounds checks to avoid
2122 // panic code being inserted again.
2123 let self_vec = (*self.string).as_mut_vec();
2124 if self.start <= self.end && self.end <= self_vec.len() {
2125 self_vec.drain(self.start..self.end);
2126 }
2127 }
2128 }
2129}
2130
2131// TODO: implement `AsRef<str/[u8]>` and `as_str`
2132
2133impl<'a, 'bump> Iterator for Drain<'a, 'bump> {
2134 type Item = char;
2135
2136 #[inline]
2137 fn next(&mut self) -> Option<char> {
2138 self.iter.next()
2139 }
2140
2141 fn size_hint(&self) -> (usize, Option<usize>) {
2142 self.iter.size_hint()
2143 }
2144}
2145
2146impl<'a, 'bump> DoubleEndedIterator for Drain<'a, 'bump> {
2147 #[inline]
2148 fn next_back(&mut self) -> Option<char> {
2149 self.iter.next_back()
2150 }
2151}
2152
2153impl<'a, 'bump> FusedIterator for Drain<'a, 'bump> {}
2154
2155#[cfg(feature = "serde")]
2156mod serialize {
2157 use super::*;
2158
2159 use serde::{Serialize, Serializer};
2160
2161 impl<'bump> Serialize for String<'bump> {
2162 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2163 where
2164 S: Serializer,
2165 {
2166 serializer.serialize_str(&self)
2167 }
2168 }
2169}