bumpalo/collections/string.rs
1// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! A UTF-8 encoded, growable string.
12//!
13//! This module contains the [`String`] type and several error types that may
14//! result from working with [`String`]s.
15//!
16//! This module is a fork of the [`std::string`] module, that uses a bump allocator.
17//!
18//! [`std::string`]: https://doc.rust-lang.org/std/string/index.html
19//!
20//! # Examples
21//!
22//! You can create a new [`String`] from a string literal with [`String::from_str_in`]:
23//!
24//! ```
25//! use bumpalo::{Bump, collections::String};
26//!
27//! let b = Bump::new();
28//!
29//! let s = String::from_str_in("world", &b);
30//! ```
31//!
32//! [`String`]: struct.String.html
33//! [`String::from_str_in`]: struct.String.html#method.from_str_in
34//!
35//! If you have a vector of valid UTF-8 bytes, you can make a [`String`] out of
36//! it. You can do the reverse too.
37//!
38//! ```
39//! use bumpalo::{Bump, collections::String};
40//!
41//! let b = Bump::new();
42//!
43//! let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
44//!
45//! // We know these bytes are valid, so we'll use `unwrap()`.
46//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
47//!
48//! assert_eq!("💖", sparkle_heart);
49//!
50//! let bytes = sparkle_heart.into_bytes();
51//!
52//! assert_eq!(bytes, [240, 159, 146, 150]);
53//! ```
54
55use crate::collections::str::lossy;
56use crate::collections::vec::Vec;
57use crate::Bump;
58use core::borrow::{Borrow, BorrowMut};
59use core::char::decode_utf16;
60use core::fmt;
61use core::hash;
62use core::iter::FusedIterator;
63use core::mem;
64use core::ops::Bound::{Excluded, Included, Unbounded};
65use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds};
66use core::ptr;
67use core::str::{self, Chars, Utf8Error};
68use core_alloc::borrow::Cow;
69
70/// Like the [`format!`] macro, but for creating [`bumpalo::collections::String`]s.
71///
72/// [`format!`]: https://doc.rust-lang.org/std/macro.format.html
73/// [`bumpalo::collections::String`]: collections/string/struct.String.html
74///
75/// # Examples
76///
77/// ```
78/// use bumpalo::Bump;
79///
80/// let b = Bump::new();
81///
82/// let who = "World";
83/// let s = bumpalo::format!(in &b, "Hello, {}!", who);
84/// assert_eq!(s, "Hello, World!")
85/// ```
86#[macro_export]
87macro_rules! format {
88 ( in $bump:expr, $fmt:expr, $($args:expr),* ) => {{
89 use $crate::core_alloc::fmt::Write;
90 let bump = $bump;
91 let mut s = $crate::collections::String::new_in(bump);
92 let _ = write!(&mut s, $fmt, $($args),*);
93 s
94 }};
95
96 ( in $bump:expr, $fmt:expr, $($args:expr,)* ) => {
97 $crate::format!(in $bump, $fmt, $($args),*)
98 };
99}
100
101/// A UTF-8 encoded, growable string.
102///
103/// The `String` type is the most common string type that has ownership over the
104/// contents of the string. It has a close relationship with its borrowed
105/// counterpart, the primitive [`str`].
106///
107/// [`str`]: https://doc.rust-lang.org/std/primitive.str.html
108///
109/// # Examples
110///
111/// You can create a `String` from a literal string with [`String::from_str_in`]:
112///
113/// ```
114/// use bumpalo::{Bump, collections::String};
115///
116/// let b = Bump::new();
117///
118/// let hello = String::from_str_in("Hello, world!", &b);
119/// ```
120///
121/// You can append a [`char`] to a `String` with the [`push`] method, and
122/// append a [`&str`] with the [`push_str`] method:
123///
124/// ```
125/// use bumpalo::{Bump, collections::String};
126///
127/// let b = Bump::new();
128///
129/// let mut hello = String::from_str_in("Hello, ", &b);
130///
131/// hello.push('w');
132/// hello.push_str("orld!");
133/// ```
134///
135/// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
136/// [`push`]: #method.push
137/// [`push_str`]: #method.push_str
138///
139/// If you have a vector of UTF-8 bytes, you can create a `String` from it with
140/// the [`from_utf8`] method:
141///
142/// ```
143/// use bumpalo::{Bump, collections::String};
144///
145/// let b = Bump::new();
146///
147/// // some bytes, in a vector
148/// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
149///
150/// // We know these bytes are valid, so we'll use `unwrap()`.
151/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
152///
153/// assert_eq!("💖", sparkle_heart);
154/// ```
155///
156/// [`from_utf8`]: #method.from_utf8
157///
158/// # Deref
159///
160/// `String`s implement <code>[`Deref`]<Target = [`str`]></code>, and so inherit all of [`str`]'s
161/// methods. In addition, this means that you can pass a `String` to a
162/// function which takes a [`&str`] by using an ampersand (`&`):
163///
164/// ```
165/// use bumpalo::{Bump, collections::String};
166///
167/// let b = Bump::new();
168///
169/// fn takes_str(s: &str) { }
170///
171/// let s = String::from_str_in("Hello", &b);
172///
173/// takes_str(&s);
174/// ```
175///
176/// This will create a [`&str`] from the `String` and pass it in. This
177/// conversion is very inexpensive, and so generally, functions will accept
178/// [`&str`]s as arguments unless they need a `String` for some specific
179/// reason.
180///
181/// In certain cases Rust doesn't have enough information to make this
182/// conversion, known as [`Deref`] coercion. In the following example a string
183/// slice [`&'a str`][`&str`] implements the trait `TraitExample`, and the function
184/// `example_func` takes anything that implements the trait. In this case Rust
185/// would need to make two implicit conversions, which Rust doesn't have the
186/// means to do. For that reason, the following example will not compile.
187///
188/// ```compile_fail,E0277
189/// use bumpalo::{Bump, collections::String};
190///
191/// trait TraitExample {}
192///
193/// impl<'a> TraitExample for &'a str {}
194///
195/// fn example_func<A: TraitExample>(example_arg: A) {}
196///
197/// let b = Bump::new();
198/// let example_string = String::from_str_in("example_string", &b);
199/// example_func(&example_string);
200/// ```
201///
202/// There are two options that would work instead. The first would be to
203/// change the line `example_func(&example_string);` to
204/// `example_func(example_string.as_str());`, using the method [`as_str()`]
205/// to explicitly extract the string slice containing the string. The second
206/// way changes `example_func(&example_string);` to
207/// `example_func(&*example_string);`. In this case we are dereferencing a
208/// `String` to a [`str`][`&str`], then referencing the [`str`][`&str`] back to
209/// [`&str`]. The second way is more idiomatic, however both work to do the
210/// conversion explicitly rather than relying on the implicit conversion.
211///
212/// # Representation
213///
214/// A `String` is made up of three components: a pointer to some bytes, a
215/// length, and a capacity. The pointer points to an internal buffer `String`
216/// uses to store its data. The length is the number of bytes currently stored
217/// in the buffer, and the capacity is the size of the buffer in bytes. As such,
218/// the length will always be less than or equal to the capacity.
219///
220/// This buffer is always stored on the heap.
221///
222/// You can look at these with the [`as_ptr`], [`len`], and [`capacity`]
223/// methods:
224///
225/// ```
226/// use bumpalo::{Bump, collections::String};
227/// use std::mem;
228///
229/// let b = Bump::new();
230///
231/// let mut story = String::from_str_in("Once upon a time...", &b);
232///
233/// let ptr = story.as_mut_ptr();
234/// let len = story.len();
235/// let capacity = story.capacity();
236///
237/// // story has nineteen bytes
238/// assert_eq!(19, len);
239///
240/// // Now that we have our parts, we throw the story away.
241/// mem::forget(story);
242///
243/// // We can re-build a String out of ptr, len, and capacity. This is all
244/// // unsafe because we are responsible for making sure the components are
245/// // valid:
246/// let s = unsafe { String::from_raw_parts_in(ptr, len, capacity, &b) } ;
247///
248/// assert_eq!(String::from_str_in("Once upon a time...", &b), s);
249/// ```
250///
251/// [`as_ptr`]: https://doc.rust-lang.org/std/primitive.str.html#method.as_ptr
252/// [`len`]: #method.len
253/// [`capacity`]: #method.capacity
254///
255/// If a `String` has enough capacity, adding elements to it will not
256/// re-allocate. For example, consider this program:
257///
258/// ```
259/// use bumpalo::{Bump, collections::String};
260///
261/// let b = Bump::new();
262///
263/// let mut s = String::new_in(&b);
264///
265/// println!("{}", s.capacity());
266///
267/// for _ in 0..5 {
268/// s.push_str("hello");
269/// println!("{}", s.capacity());
270/// }
271/// ```
272///
273/// This will output the following:
274///
275/// ```text
276/// 0
277/// 5
278/// 10
279/// 20
280/// 20
281/// 40
282/// ```
283///
284/// At first, we have no memory allocated at all, but as we append to the
285/// string, it increases its capacity appropriately. If we instead use the
286/// [`with_capacity_in`] method to allocate the correct capacity initially:
287///
288/// ```
289/// use bumpalo::{Bump, collections::String};
290///
291/// let b = Bump::new();
292///
293/// let mut s = String::with_capacity_in(25, &b);
294///
295/// println!("{}", s.capacity());
296///
297/// for _ in 0..5 {
298/// s.push_str("hello");
299/// println!("{}", s.capacity());
300/// }
301/// ```
302///
303/// [`with_capacity_in`]: #method.with_capacity_in
304///
305/// We end up with a different output:
306///
307/// ```text
308/// 25
309/// 25
310/// 25
311/// 25
312/// 25
313/// 25
314/// ```
315///
316/// Here, there's no need to allocate more memory inside the loop.
317///
318/// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
319/// [`Deref`]: https://doc.rust-lang.org/std/ops/trait.Deref.html
320/// [`as_str()`]: struct.String.html#method.as_str
321#[derive(PartialOrd, Eq, Ord)]
322pub struct String<'bump> {
323 vec: Vec<'bump, u8>,
324}
325
326/// A possible error value when converting a `String` from a UTF-8 byte vector.
327///
328/// This type is the error type for the [`from_utf8`] method on [`String`]. It
329/// is designed in such a way to carefully avoid reallocations: the
330/// [`into_bytes`] method will give back the byte vector that was used in the
331/// conversion attempt.
332///
333/// [`from_utf8`]: struct.String.html#method.from_utf8
334/// [`String`]: struct.String.html
335/// [`into_bytes`]: struct.FromUtf8Error.html#method.into_bytes
336///
337/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
338/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
339/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error`
340/// through the [`utf8_error`] method.
341///
342/// [`Utf8Error`]: https://doc.rust-lang.org/std/str/struct.Utf8Error.html
343/// [`std::str`]: https://doc.rust-lang.org/std/str/index.html
344/// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
345/// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
346/// [`utf8_error`]: #method.utf8_error
347///
348/// # Examples
349///
350/// Basic usage:
351///
352/// ```
353/// use bumpalo::{Bump, collections::String};
354///
355/// let b = Bump::new();
356///
357/// // some invalid bytes, in a vector
358/// let bytes = bumpalo::vec![in &b; 0, 159];
359///
360/// let value = String::from_utf8(bytes);
361///
362/// assert!(value.is_err());
363/// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
364/// ```
365#[derive(Debug)]
366pub struct FromUtf8Error<'bump> {
367 bytes: Vec<'bump, u8>,
368 error: Utf8Error,
369}
370
371/// A possible error value when converting a `String` from a UTF-16 byte slice.
372///
373/// This type is the error type for the [`from_utf16_in`] method on [`String`].
374///
375/// [`from_utf16_in`]: struct.String.html#method.from_utf16_in
376/// [`String`]: struct.String.html
377///
378/// # Examples
379///
380/// Basic usage:
381///
382/// ```
383/// use bumpalo::{Bump, collections::String};
384///
385/// let b = Bump::new();
386///
387/// // 𝄞mu<invalid>ic
388/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
389///
390/// assert!(String::from_utf16_in(v, &b).is_err());
391/// ```
392#[derive(Debug)]
393pub struct FromUtf16Error(());
394
395impl<'bump> String<'bump> {
396 /// Creates a new empty `String`.
397 ///
398 /// Given that the `String` is empty, this will not allocate any initial
399 /// buffer. While that means that this initial operation is very
400 /// inexpensive, it may cause excessive allocation later when you add
401 /// data. If you have an idea of how much data the `String` will hold,
402 /// consider the [`with_capacity_in`] method to prevent excessive
403 /// re-allocation.
404 ///
405 /// [`with_capacity_in`]: #method.with_capacity_in
406 ///
407 /// # Examples
408 ///
409 /// Basic usage:
410 ///
411 /// ```
412 /// use bumpalo::{Bump, collections::String};
413 ///
414 /// let b = Bump::new();
415 ///
416 /// let s = String::new_in(&b);
417 /// ```
418 #[inline]
419 pub fn new_in(bump: &'bump Bump) -> String<'bump> {
420 String {
421 vec: Vec::new_in(bump),
422 }
423 }
424
425 /// Creates a new empty `String` with a particular capacity.
426 ///
427 /// `String`s have an internal buffer to hold their data. The capacity is
428 /// the length of that buffer, and can be queried with the [`capacity`]
429 /// method. This method creates an empty `String`, but one with an initial
430 /// buffer that can hold `capacity` bytes. This is useful when you may be
431 /// appending a bunch of data to the `String`, reducing the number of
432 /// reallocations it needs to do.
433 ///
434 /// [`capacity`]: #method.capacity
435 ///
436 /// If the given capacity is `0`, no allocation will occur, and this method
437 /// is identical to the [`new_in`] method.
438 ///
439 /// [`new_in`]: #method.new
440 ///
441 /// # Examples
442 ///
443 /// Basic usage:
444 ///
445 /// ```
446 /// use bumpalo::{Bump, collections::String};
447 ///
448 /// let b = Bump::new();
449 ///
450 /// let mut s = String::with_capacity_in(10, &b);
451 ///
452 /// // The String contains no chars, even though it has capacity for more
453 /// assert_eq!(s.len(), 0);
454 ///
455 /// // These are all done without reallocating...
456 /// let cap = s.capacity();
457 /// for _ in 0..10 {
458 /// s.push('a');
459 /// }
460 ///
461 /// assert_eq!(s.capacity(), cap);
462 ///
463 /// // ...but this may make the vector reallocate
464 /// s.push('a');
465 /// ```
466 #[inline]
467 pub fn with_capacity_in(capacity: usize, bump: &'bump Bump) -> String<'bump> {
468 String {
469 vec: Vec::with_capacity_in(capacity, bump),
470 }
471 }
472
473 /// Converts a vector of bytes to a `String`.
474 ///
475 /// A string (`String`) is made of bytes ([`u8`]), and a vector of bytes
476 /// ([`Vec<u8>`]) is made of bytes, so this function converts between the
477 /// two. Not all byte slices are valid `String`s, however: `String`
478 /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
479 /// the bytes are valid UTF-8, and then does the conversion.
480 ///
481 /// If you are sure that the byte slice is valid UTF-8, and you don't want
482 /// to incur the overhead of the validity check, there is an unsafe version
483 /// of this function, [`from_utf8_unchecked`], which has the same behavior
484 /// but skips the check.
485 ///
486 /// This method will take care to not copy the vector, for efficiency's
487 /// sake.
488 ///
489 /// If you need a [`&str`] instead of a `String`, consider
490 /// [`str::from_utf8`].
491 ///
492 /// The inverse of this method is [`into_bytes`].
493 ///
494 /// # Errors
495 ///
496 /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
497 /// provided bytes are not UTF-8. The vector you moved in is also included.
498 ///
499 /// # Examples
500 ///
501 /// Basic usage:
502 ///
503 /// ```
504 /// use bumpalo::{Bump, collections::String};
505 ///
506 /// let b = Bump::new();
507 ///
508 /// // some bytes, in a vector
509 /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
510 ///
511 /// // We know these bytes are valid, so we'll use `unwrap()`.
512 /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
513 ///
514 /// assert_eq!("💖", sparkle_heart);
515 /// ```
516 ///
517 /// Incorrect bytes:
518 ///
519 /// ```
520 /// use bumpalo::{Bump, collections::String};
521 ///
522 /// let b = Bump::new();
523 ///
524 /// // some invalid bytes, in a vector
525 /// let sparkle_heart = bumpalo::vec![in &b; 0, 159, 146, 150];
526 ///
527 /// assert!(String::from_utf8(sparkle_heart).is_err());
528 /// ```
529 ///
530 /// See the docs for [`FromUtf8Error`] for more details on what you can do
531 /// with this error.
532 ///
533 /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
534 /// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
535 /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
536 /// [`Vec<u8>`]: ../vec/struct.Vec.html
537 /// [`str::from_utf8`]: https://doc.rust-lang.org/std/str/fn.from_utf8.html
538 /// [`into_bytes`]: struct.String.html#method.into_bytes
539 /// [`FromUtf8Error`]: struct.FromUtf8Error.html
540 /// [`Err`]: https://doc.rust-lang.org/std/result/enum.Result.html#variant.Err
541 #[inline]
542 pub fn from_utf8(vec: Vec<'bump, u8>) -> Result<String<'bump>, FromUtf8Error<'bump>> {
543 match str::from_utf8(&vec) {
544 Ok(..) => Ok(String { vec }),
545 Err(e) => Err(FromUtf8Error {
546 bytes: vec,
547 error: e,
548 }),
549 }
550 }
551
552 /// Converts a slice of bytes to a string, including invalid characters.
553 ///
554 /// Strings are made of bytes ([`u8`]), and a slice of bytes
555 /// ([`&[u8]`][slice]) is made of bytes, so this function converts
556 /// between the two. Not all byte slices are valid strings, however: strings
557 /// are required to be valid UTF-8. During this conversion,
558 /// `from_utf8_lossy_in()` will replace any invalid UTF-8 sequences with
559 /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], which looks like this: �
560 ///
561 /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
562 /// [slice]: https://doc.rust-lang.org/std/primitive.slice.html
563 /// [U+FFFD]: https://doc.rust-lang.org/std/char/constant.REPLACEMENT_CHARACTER.html
564 ///
565 /// If you are sure that the byte slice is valid UTF-8, and you don't want
566 /// to incur the overhead of the conversion, there is an unsafe version
567 /// of this function, [`from_utf8_unchecked`], which has the same behavior
568 /// but skips the checks.
569 ///
570 /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
571 ///
572 /// # Examples
573 ///
574 /// Basic usage:
575 ///
576 /// ```
577 /// use bumpalo::{collections::String, Bump, vec};
578 ///
579 /// let b = Bump::new();
580 ///
581 /// // some bytes, in a vector
582 /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
583 ///
584 /// let sparkle_heart = String::from_utf8_lossy_in(&sparkle_heart, &b);
585 ///
586 /// assert_eq!("💖", sparkle_heart);
587 /// ```
588 ///
589 /// Incorrect bytes:
590 ///
591 /// ```
592 /// use bumpalo::{collections::String, Bump, vec};
593 ///
594 /// let b = Bump::new();
595 ///
596 /// // some invalid bytes
597 /// let input = b"Hello \xF0\x90\x80World";
598 /// let output = String::from_utf8_lossy_in(input, &b);
599 ///
600 /// assert_eq!("Hello �World", output);
601 /// ```
602 pub fn from_utf8_lossy_in(v: &[u8], bump: &'bump Bump) -> String<'bump> {
603 let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks();
604
605 let (first_valid, first_broken) = if let Some(chunk) = iter.next() {
606 let lossy::Utf8LossyChunk { valid, broken } = chunk;
607 if valid.len() == v.len() {
608 debug_assert!(broken.is_empty());
609 unsafe {
610 return String::from_utf8_unchecked(Vec::from_iter_in(v.iter().cloned(), bump));
611 }
612 }
613 (valid, broken)
614 } else {
615 return String::from_str_in("", bump);
616 };
617
618 const REPLACEMENT: &str = "\u{FFFD}";
619
620 let mut res = String::with_capacity_in(v.len(), bump);
621 res.push_str(first_valid);
622 if !first_broken.is_empty() {
623 res.push_str(REPLACEMENT);
624 }
625
626 for lossy::Utf8LossyChunk { valid, broken } in iter {
627 res.push_str(valid);
628 if !broken.is_empty() {
629 res.push_str(REPLACEMENT);
630 }
631 }
632
633 res
634 }
635
636 /// Decode a UTF-16 encoded slice `v` into a `String`, returning [`Err`]
637 /// if `v` contains any invalid data.
638 ///
639 /// [`Err`]: https://doc.rust-lang.org/std/result/enum.Result.html#variant.Err
640 ///
641 /// # Examples
642 ///
643 /// Basic usage:
644 ///
645 /// ```
646 /// use bumpalo::{Bump, collections::String};
647 ///
648 /// let b = Bump::new();
649 ///
650 /// // 𝄞music
651 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063];
652 /// assert_eq!(String::from_str_in("𝄞music", &b), String::from_utf16_in(v, &b).unwrap());
653 ///
654 /// // 𝄞mu<invalid>ic
655 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
656 /// assert!(String::from_utf16_in(v, &b).is_err());
657 /// ```
658 pub fn from_utf16_in(v: &[u16], bump: &'bump Bump) -> Result<String<'bump>, FromUtf16Error> {
659 let mut ret = String::with_capacity_in(v.len(), bump);
660 for c in decode_utf16(v.iter().cloned()) {
661 if let Ok(c) = c {
662 ret.push(c);
663 } else {
664 return Err(FromUtf16Error(()));
665 }
666 }
667 Ok(ret)
668 }
669
670 /// Construct a new `String<'bump>` from a string slice.
671 ///
672 /// # Examples
673 ///
674 /// ```
675 /// use bumpalo::{Bump, collections::String};
676 ///
677 /// let b = Bump::new();
678 ///
679 /// let s = String::from_str_in("hello", &b);
680 /// assert_eq!(s, "hello");
681 /// ```
682 pub fn from_str_in(s: &str, bump: &'bump Bump) -> String<'bump> {
683 let len = s.len();
684 let mut t = String::with_capacity_in(len, bump);
685 // SAFETY:
686 // * `src` is valid for reads of `s.len()` bytes by virtue of being an allocated `&str`.
687 // * `dst` is valid for writes of `s.len()` bytes as `String::with_capacity_in(s.len(), bump)`
688 // above guarantees that.
689 // * Alignment is not relevant as `u8` has no alignment requirements.
690 // * Source and destination ranges cannot overlap as we just reserved the destination
691 // range from the bump.
692 unsafe { ptr::copy_nonoverlapping(s.as_ptr(), t.vec.as_mut_ptr(), len) };
693 // SAFETY: We reserved sufficent capacity for the string above.
694 // The elements at `0..len` were initialized by `copy_nonoverlapping` above.
695 unsafe { t.vec.set_len(len) };
696 t
697 }
698
699 /// Construct a new `String<'bump>` from an iterator of `char`s.
700 ///
701 /// # Examples
702 ///
703 /// ```
704 /// use bumpalo::{Bump, collections::String};
705 ///
706 /// let b = Bump::new();
707 ///
708 /// let s = String::from_iter_in(['h', 'e', 'l', 'l', 'o'].iter().cloned(), &b);
709 /// assert_eq!(s, "hello");
710 /// ```
711 pub fn from_iter_in<I: IntoIterator<Item = char>>(iter: I, bump: &'bump Bump) -> String<'bump> {
712 let mut s = String::new_in(bump);
713 for c in iter {
714 s.push(c);
715 }
716 s
717 }
718
719 /// Creates a new `String` from a length, capacity, and pointer.
720 ///
721 /// # Safety
722 ///
723 /// This is highly unsafe, due to the number of invariants that aren't
724 /// checked:
725 ///
726 /// * The memory at `ptr` needs to have been previously allocated by the
727 /// same allocator the standard library uses.
728 /// * `length` needs to be less than or equal to `capacity`.
729 /// * `capacity` needs to be the correct value.
730 ///
731 /// Violating these may cause problems like corrupting the allocator's
732 /// internal data structures.
733 ///
734 /// The ownership of `ptr` is effectively transferred to the
735 /// `String` which may then deallocate, reallocate or change the
736 /// contents of memory pointed to by the pointer at will. Ensure
737 /// that nothing else uses the pointer after calling this
738 /// function.
739 ///
740 /// # Examples
741 ///
742 /// Basic usage:
743 ///
744 /// ```
745 /// use bumpalo::{Bump, collections::String};
746 /// use std::mem;
747 ///
748 /// let b = Bump::new();
749 ///
750 /// unsafe {
751 /// let mut s = String::from_str_in("hello", &b);
752 /// let ptr = s.as_mut_ptr();
753 /// let len = s.len();
754 /// let capacity = s.capacity();
755 ///
756 /// mem::forget(s);
757 ///
758 /// let s = String::from_raw_parts_in(ptr, len, capacity, &b);
759 ///
760 /// assert_eq!(s, "hello");
761 /// }
762 /// ```
763 #[inline]
764 pub unsafe fn from_raw_parts_in(
765 buf: *mut u8,
766 length: usize,
767 capacity: usize,
768 bump: &'bump Bump,
769 ) -> String<'bump> {
770 String {
771 vec: Vec::from_raw_parts_in(buf, length, capacity, bump),
772 }
773 }
774
775 /// Converts a vector of bytes to a `String` without checking that the
776 /// string contains valid UTF-8.
777 ///
778 /// See the safe version, [`from_utf8`], for more details.
779 ///
780 /// [`from_utf8`]: struct.String.html#method.from_utf8
781 ///
782 /// # Safety
783 ///
784 /// This function is unsafe because it does not check that the bytes passed
785 /// to it are valid UTF-8. If this constraint is violated, it may cause
786 /// memory unsafety issues with future users of the `String`,
787 /// as it is assumed that `String`s are valid UTF-8.
788 ///
789 /// # Examples
790 ///
791 /// Basic usage:
792 ///
793 /// ```
794 /// use bumpalo::{Bump, collections::String};
795 ///
796 /// let b = Bump::new();
797 ///
798 /// // some bytes, in a vector
799 /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
800 ///
801 /// let sparkle_heart = unsafe {
802 /// String::from_utf8_unchecked(sparkle_heart)
803 /// };
804 ///
805 /// assert_eq!("💖", sparkle_heart);
806 /// ```
807 #[inline]
808 pub unsafe fn from_utf8_unchecked(bytes: Vec<'bump, u8>) -> String<'bump> {
809 String { vec: bytes }
810 }
811
812 /// Returns a shared reference to the allocator backing this `String`.
813 ///
814 /// # Examples
815 ///
816 /// ```
817 /// use bumpalo::{Bump, collections::String};
818 ///
819 /// // uses the same allocator as the provided `String`
820 /// fn copy_string<'bump>(s: &String<'bump>) -> &'bump str {
821 /// s.bump().alloc_str(s.as_str())
822 /// }
823 /// ```
824 #[inline]
825 #[must_use]
826 pub fn bump(&self) -> &'bump Bump {
827 self.vec.bump()
828 }
829
830 /// Converts a `String` into a byte vector.
831 ///
832 /// This consumes the `String`, so we do not need to copy its contents.
833 ///
834 /// # Examples
835 ///
836 /// Basic usage:
837 ///
838 /// ```
839 /// use bumpalo::{Bump, collections::String};
840 ///
841 /// let b = Bump::new();
842 ///
843 /// let s = String::from_str_in("hello", &b);
844 ///
845 /// assert_eq!(s.into_bytes(), [104, 101, 108, 108, 111]);
846 /// ```
847 #[inline]
848 pub fn into_bytes(self) -> Vec<'bump, u8> {
849 self.vec
850 }
851
852 /// Convert this `String<'bump>` into a `&'bump str`. This is analogous to
853 /// [`std::string::String::into_boxed_str`][into_boxed_str].
854 ///
855 /// [into_boxed_str]: https://doc.rust-lang.org/std/string/struct.String.html#method.into_boxed_str
856 ///
857 /// # Example
858 ///
859 /// ```
860 /// use bumpalo::{Bump, collections::String};
861 ///
862 /// let b = Bump::new();
863 ///
864 /// let s = String::from_str_in("foo", &b);
865 ///
866 /// assert_eq!(s.into_bump_str(), "foo");
867 /// ```
868 pub fn into_bump_str(self) -> &'bump str {
869 let s = unsafe {
870 let s = self.as_str();
871 mem::transmute(s)
872 };
873 mem::forget(self);
874 s
875 }
876
877 /// Extracts a string slice containing the entire `String`.
878 ///
879 /// # Examples
880 ///
881 /// Basic usage:
882 ///
883 /// ```
884 /// use bumpalo::{Bump, collections::String};
885 ///
886 /// let b = Bump::new();
887 ///
888 /// let s = String::from_str_in("foo", &b);
889 ///
890 /// assert_eq!("foo", s.as_str());
891 /// ```
892 #[inline]
893 pub fn as_str(&self) -> &str {
894 self
895 }
896
897 /// Converts a `String` into a mutable string slice.
898 ///
899 /// # Examples
900 ///
901 /// Basic usage:
902 ///
903 /// ```
904 /// use bumpalo::{Bump, collections::String};
905 ///
906 /// let b = Bump::new();
907 ///
908 /// let mut s = String::from_str_in("foobar", &b);
909 /// let s_mut_str = s.as_mut_str();
910 ///
911 /// s_mut_str.make_ascii_uppercase();
912 ///
913 /// assert_eq!("FOOBAR", s_mut_str);
914 /// ```
915 #[inline]
916 pub fn as_mut_str(&mut self) -> &mut str {
917 self
918 }
919
920 /// Appends a given string slice onto the end of this `String`.
921 ///
922 /// # Examples
923 ///
924 /// Basic usage:
925 ///
926 /// ```
927 /// use bumpalo::{Bump, collections::String};
928 ///
929 /// let b = Bump::new();
930 ///
931 /// let mut s = String::from_str_in("foo", &b);
932 ///
933 /// s.push_str("bar");
934 ///
935 /// assert_eq!("foobar", s);
936 /// ```
937 #[inline]
938 pub fn push_str(&mut self, string: &str) {
939 self.vec.extend_from_slice_copy(string.as_bytes())
940 }
941
942 /// Returns this `String`'s capacity, in bytes.
943 ///
944 /// # Examples
945 ///
946 /// Basic usage:
947 ///
948 /// ```
949 /// use bumpalo::{Bump, collections::String};
950 ///
951 /// let b = Bump::new();
952 ///
953 /// let s = String::with_capacity_in(10, &b);
954 ///
955 /// assert!(s.capacity() >= 10);
956 /// ```
957 #[inline]
958 pub fn capacity(&self) -> usize {
959 self.vec.capacity()
960 }
961
962 /// Ensures that this `String`'s capacity is at least `additional` bytes
963 /// larger than its length.
964 ///
965 /// The capacity may be increased by more than `additional` bytes if it
966 /// chooses, to prevent frequent reallocations.
967 ///
968 /// If you do not want this "at least" behavior, see the [`reserve_exact`]
969 /// method.
970 ///
971 /// # Panics
972 ///
973 /// Panics if the new capacity overflows [`usize`].
974 ///
975 /// [`reserve_exact`]: struct.String.html#method.reserve_exact
976 /// [`usize`]: https://doc.rust-lang.org/std/primitive.usize.html
977 ///
978 /// # Examples
979 ///
980 /// Basic usage:
981 ///
982 /// ```
983 /// use bumpalo::{Bump, collections::String};
984 ///
985 /// let b = Bump::new();
986 ///
987 /// let mut s = String::new_in(&b);
988 ///
989 /// s.reserve(10);
990 ///
991 /// assert!(s.capacity() >= 10);
992 /// ```
993 ///
994 /// This may not actually increase the capacity:
995 ///
996 /// ```
997 /// use bumpalo::{Bump, collections::String};
998 ///
999 /// let b = Bump::new();
1000 ///
1001 /// let mut s = String::with_capacity_in(10, &b);
1002 /// s.push('a');
1003 /// s.push('b');
1004 ///
1005 /// // s now has a length of 2 and a capacity of 10
1006 /// assert_eq!(2, s.len());
1007 /// assert_eq!(10, s.capacity());
1008 ///
1009 /// // Since we already have an extra 8 capacity, calling this...
1010 /// s.reserve(8);
1011 ///
1012 /// // ... doesn't actually increase.
1013 /// assert_eq!(10, s.capacity());
1014 /// ```
1015 #[inline]
1016 pub fn reserve(&mut self, additional: usize) {
1017 self.vec.reserve(additional)
1018 }
1019
1020 /// Ensures that this `String`'s capacity is `additional` bytes
1021 /// larger than its length.
1022 ///
1023 /// Consider using the [`reserve`] method unless you absolutely know
1024 /// better than the allocator.
1025 ///
1026 /// [`reserve`]: #method.reserve
1027 ///
1028 /// # Panics
1029 ///
1030 /// Panics if the new capacity overflows `usize`.
1031 ///
1032 /// # Examples
1033 ///
1034 /// Basic usage:
1035 ///
1036 /// ```
1037 /// use bumpalo::{Bump, collections::String};
1038 ///
1039 /// let b = Bump::new();
1040 ///
1041 /// let mut s = String::new_in(&b);
1042 ///
1043 /// s.reserve_exact(10);
1044 ///
1045 /// assert!(s.capacity() >= 10);
1046 /// ```
1047 ///
1048 /// This may not actually increase the capacity:
1049 ///
1050 /// ```
1051 /// use bumpalo::{Bump, collections::String};
1052 ///
1053 /// let b = Bump::new();
1054 ///
1055 /// let mut s = String::with_capacity_in(10, &b);
1056 /// s.push('a');
1057 /// s.push('b');
1058 ///
1059 /// // s now has a length of 2 and a capacity of 10
1060 /// assert_eq!(2, s.len());
1061 /// assert_eq!(10, s.capacity());
1062 ///
1063 /// // Since we already have an extra 8 capacity, calling this...
1064 /// s.reserve_exact(8);
1065 ///
1066 /// // ... doesn't actually increase.
1067 /// assert_eq!(10, s.capacity());
1068 /// ```
1069 #[inline]
1070 pub fn reserve_exact(&mut self, additional: usize) {
1071 self.vec.reserve_exact(additional)
1072 }
1073
1074 /// Shrinks the capacity of this `String` to match its length.
1075 ///
1076 /// # Examples
1077 ///
1078 /// Basic usage:
1079 ///
1080 /// ```
1081 /// use bumpalo::{Bump, collections::String};
1082 ///
1083 /// let b = Bump::new();
1084 ///
1085 /// let mut s = String::from_str_in("foo", &b);
1086 ///
1087 /// s.reserve(100);
1088 /// assert!(s.capacity() >= 100);
1089 ///
1090 /// s.shrink_to_fit();
1091 /// assert_eq!(3, s.capacity());
1092 /// ```
1093 #[inline]
1094 pub fn shrink_to_fit(&mut self) {
1095 self.vec.shrink_to_fit()
1096 }
1097
1098 /// Appends the given [`char`] to the end of this `String`.
1099 ///
1100 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1101 ///
1102 /// # Examples
1103 ///
1104 /// Basic usage:
1105 ///
1106 /// ```
1107 /// use bumpalo::{Bump, collections::String};
1108 ///
1109 /// let b = Bump::new();
1110 ///
1111 /// let mut s = String::from_str_in("abc", &b);
1112 ///
1113 /// s.push('1');
1114 /// s.push('2');
1115 /// s.push('3');
1116 ///
1117 /// assert_eq!("abc123", s);
1118 /// ```
1119 #[inline]
1120 pub fn push(&mut self, ch: char) {
1121 match ch.len_utf8() {
1122 1 => self.vec.push(ch as u8),
1123 _ => self
1124 .vec
1125 .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
1126 }
1127 }
1128
1129 /// Returns a byte slice of this `String`'s contents.
1130 ///
1131 /// The inverse of this method is [`from_utf8`].
1132 ///
1133 /// [`from_utf8`]: #method.from_utf8
1134 ///
1135 /// # Examples
1136 ///
1137 /// Basic usage:
1138 ///
1139 /// ```
1140 /// use bumpalo::{Bump, collections::String};
1141 ///
1142 /// let b = Bump::new();
1143 ///
1144 /// let s = String::from_str_in("hello", &b);
1145 ///
1146 /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
1147 /// ```
1148 #[inline]
1149 pub fn as_bytes(&self) -> &[u8] {
1150 &self.vec
1151 }
1152
1153 /// Shortens this `String` to the specified length.
1154 ///
1155 /// If `new_len` is greater than the string's current length, this has no
1156 /// effect.
1157 ///
1158 /// Note that this method has no effect on the allocated capacity
1159 /// of the string.
1160 ///
1161 /// # Panics
1162 ///
1163 /// Panics if `new_len` does not lie on a [`char`] boundary.
1164 ///
1165 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1166 ///
1167 /// # Examples
1168 ///
1169 /// Basic usage:
1170 ///
1171 /// ```
1172 /// use bumpalo::{Bump, collections::String};
1173 ///
1174 /// let b = Bump::new();
1175 ///
1176 /// let mut s = String::from_str_in("hello", &b);
1177 ///
1178 /// s.truncate(2);
1179 ///
1180 /// assert_eq!("he", s);
1181 /// ```
1182 #[inline]
1183 pub fn truncate(&mut self, new_len: usize) {
1184 if new_len <= self.len() {
1185 assert!(self.is_char_boundary(new_len));
1186 self.vec.truncate(new_len)
1187 }
1188 }
1189
1190 /// Removes the last character from the string buffer and returns it.
1191 ///
1192 /// Returns [`None`] if this `String` is empty.
1193 ///
1194 /// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
1195 ///
1196 /// # Examples
1197 ///
1198 /// Basic usage:
1199 ///
1200 /// ```
1201 /// use bumpalo::{Bump, collections::String};
1202 ///
1203 /// let b = Bump::new();
1204 ///
1205 /// let mut s = String::from_str_in("foo", &b);
1206 ///
1207 /// assert_eq!(s.pop(), Some('o'));
1208 /// assert_eq!(s.pop(), Some('o'));
1209 /// assert_eq!(s.pop(), Some('f'));
1210 ///
1211 /// assert_eq!(s.pop(), None);
1212 /// ```
1213 #[inline]
1214 pub fn pop(&mut self) -> Option<char> {
1215 let ch = self.chars().rev().next()?;
1216 let newlen = self.len() - ch.len_utf8();
1217 unsafe {
1218 self.vec.set_len(newlen);
1219 }
1220 Some(ch)
1221 }
1222
1223 /// Removes a [`char`] from this `String` at a byte position and returns it.
1224 ///
1225 /// This is an `O(n)` operation, as it requires copying every element in the
1226 /// buffer.
1227 ///
1228 /// # Panics
1229 ///
1230 /// Panics if `idx` is larger than or equal to the `String`'s length,
1231 /// or if it does not lie on a [`char`] boundary.
1232 ///
1233 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1234 ///
1235 /// # Examples
1236 ///
1237 /// Basic usage:
1238 ///
1239 /// ```
1240 /// use bumpalo::{Bump, collections::String};
1241 ///
1242 /// let b = Bump::new();
1243 ///
1244 /// let mut s = String::from_str_in("foo", &b);
1245 ///
1246 /// assert_eq!(s.remove(0), 'f');
1247 /// assert_eq!(s.remove(1), 'o');
1248 /// assert_eq!(s.remove(0), 'o');
1249 /// ```
1250 #[inline]
1251 pub fn remove(&mut self, idx: usize) -> char {
1252 let ch = match self[idx..].chars().next() {
1253 Some(ch) => ch,
1254 None => panic!("cannot remove a char from the end of a string"),
1255 };
1256
1257 let next = idx + ch.len_utf8();
1258 let len = self.len();
1259 unsafe {
1260 ptr::copy(
1261 self.vec.as_ptr().add(next),
1262 self.vec.as_mut_ptr().add(idx),
1263 len - next,
1264 );
1265 self.vec.set_len(len - (next - idx));
1266 }
1267 ch
1268 }
1269
1270 /// Retains only the characters specified by the predicate.
1271 ///
1272 /// In other words, remove all characters `c` such that `f(c)` returns `false`.
1273 /// This method operates in place and preserves the order of the retained
1274 /// characters.
1275 ///
1276 /// # Examples
1277 ///
1278 /// ```
1279 /// use bumpalo::{Bump, collections::String};
1280 ///
1281 /// let b = Bump::new();
1282 ///
1283 /// let mut s = String::from_str_in("f_o_ob_ar", &b);
1284 ///
1285 /// s.retain(|c| c != '_');
1286 ///
1287 /// assert_eq!(s, "foobar");
1288 /// ```
1289 #[inline]
1290 pub fn retain<F>(&mut self, mut f: F)
1291 where
1292 F: FnMut(char) -> bool,
1293 {
1294 let len = self.len();
1295 let mut del_bytes = 0;
1296 let mut idx = 0;
1297
1298 while idx < len {
1299 let ch = unsafe { self.get_unchecked(idx..len).chars().next().unwrap() };
1300 let ch_len = ch.len_utf8();
1301
1302 if !f(ch) {
1303 del_bytes += ch_len;
1304 } else if del_bytes > 0 {
1305 unsafe {
1306 ptr::copy(
1307 self.vec.as_ptr().add(idx),
1308 self.vec.as_mut_ptr().add(idx - del_bytes),
1309 ch_len,
1310 );
1311 }
1312 }
1313
1314 // Point idx to the next char
1315 idx += ch_len;
1316 }
1317
1318 if del_bytes > 0 {
1319 unsafe {
1320 self.vec.set_len(len - del_bytes);
1321 }
1322 }
1323 }
1324
1325 /// Inserts a character into this `String` at a byte position.
1326 ///
1327 /// This is an `O(n)` operation as it requires copying every element in the
1328 /// buffer.
1329 ///
1330 /// # Panics
1331 ///
1332 /// Panics if `idx` is larger than the `String`'s length, or if it does not
1333 /// lie on a [`char`] boundary.
1334 ///
1335 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1336 ///
1337 /// # Examples
1338 ///
1339 /// Basic usage:
1340 ///
1341 /// ```
1342 /// use bumpalo::{Bump, collections::String};
1343 ///
1344 /// let b = Bump::new();
1345 ///
1346 /// let mut s = String::with_capacity_in(3, &b);
1347 ///
1348 /// s.insert(0, 'f');
1349 /// s.insert(1, 'o');
1350 /// s.insert(2, 'o');
1351 ///
1352 /// assert_eq!("foo", s);
1353 /// ```
1354 #[inline]
1355 pub fn insert(&mut self, idx: usize, ch: char) {
1356 assert!(self.is_char_boundary(idx));
1357 let mut bits = [0; 4];
1358 let bits = ch.encode_utf8(&mut bits).as_bytes();
1359
1360 unsafe {
1361 self.insert_bytes(idx, bits);
1362 }
1363 }
1364
1365 unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) {
1366 let len = self.len();
1367 let amt = bytes.len();
1368 self.vec.reserve(amt);
1369
1370 ptr::copy(
1371 self.vec.as_ptr().add(idx),
1372 self.vec.as_mut_ptr().add(idx + amt),
1373 len - idx,
1374 );
1375 ptr::copy(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt);
1376 self.vec.set_len(len + amt);
1377 }
1378
1379 /// Inserts a string slice into this `String` at a byte position.
1380 ///
1381 /// This is an `O(n)` operation as it requires copying every element in the
1382 /// buffer.
1383 ///
1384 /// # Panics
1385 ///
1386 /// Panics if `idx` is larger than the `String`'s length, or if it does not
1387 /// lie on a [`char`] boundary.
1388 ///
1389 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1390 ///
1391 /// # Examples
1392 ///
1393 /// Basic usage:
1394 ///
1395 /// ```
1396 /// use bumpalo::{Bump, collections::String};
1397 ///
1398 /// let b = Bump::new();
1399 ///
1400 /// let mut s = String::from_str_in("bar", &b);
1401 ///
1402 /// s.insert_str(0, "foo");
1403 ///
1404 /// assert_eq!("foobar", s);
1405 /// ```
1406 #[inline]
1407 pub fn insert_str(&mut self, idx: usize, string: &str) {
1408 assert!(self.is_char_boundary(idx));
1409
1410 unsafe {
1411 self.insert_bytes(idx, string.as_bytes());
1412 }
1413 }
1414
1415 /// Returns a mutable reference to the contents of this `String`.
1416 ///
1417 /// # Safety
1418 ///
1419 /// This function is unsafe because the returned `&mut Vec` allows writing
1420 /// bytes which are not valid UTF-8. If this constraint is violated, using
1421 /// the original `String` after dropping the `&mut Vec` may violate memory
1422 /// safety, as it is assumed that `String`s are valid UTF-8.
1423 ///
1424 /// # Examples
1425 ///
1426 /// Basic usage:
1427 ///
1428 /// ```
1429 /// use bumpalo::{Bump, collections::String};
1430 ///
1431 /// let b = Bump::new();
1432 ///
1433 /// let mut s = String::from_str_in("hello", &b);
1434 ///
1435 /// unsafe {
1436 /// let vec = s.as_mut_vec();
1437 /// assert_eq!(vec, &[104, 101, 108, 108, 111]);
1438 ///
1439 /// vec.reverse();
1440 /// }
1441 /// assert_eq!(s, "olleh");
1442 /// ```
1443 #[inline]
1444 pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<'bump, u8> {
1445 &mut self.vec
1446 }
1447
1448 /// Returns the length of this `String`, in bytes.
1449 ///
1450 /// # Examples
1451 ///
1452 /// Basic usage:
1453 ///
1454 /// ```
1455 /// use bumpalo::{Bump, collections::String};
1456 ///
1457 /// let b = Bump::new();
1458 ///
1459 /// let a = String::from_str_in("foo", &b);
1460 ///
1461 /// assert_eq!(a.len(), 3);
1462 /// ```
1463 #[inline]
1464 pub fn len(&self) -> usize {
1465 self.vec.len()
1466 }
1467
1468 /// Returns `true` if this `String` has a length of zero.
1469 ///
1470 /// Returns `false` otherwise.
1471 ///
1472 /// # Examples
1473 ///
1474 /// Basic usage:
1475 ///
1476 /// ```
1477 /// use bumpalo::{Bump, collections::String};
1478 ///
1479 /// let b = Bump::new();
1480 ///
1481 /// let mut v = String::new_in(&b);
1482 /// assert!(v.is_empty());
1483 ///
1484 /// v.push('a');
1485 /// assert!(!v.is_empty());
1486 /// ```
1487 #[inline]
1488 pub fn is_empty(&self) -> bool {
1489 self.len() == 0
1490 }
1491
1492 /// Splits the string into two at the given index.
1493 ///
1494 /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and
1495 /// the returned `String` contains bytes `[at, len)`. `at` must be on the
1496 /// boundary of a UTF-8 code point.
1497 ///
1498 /// Note that the capacity of `self` does not change.
1499 ///
1500 /// # Panics
1501 ///
1502 /// Panics if `at` is not on a UTF-8 code point boundary, or if it is beyond the last
1503 /// code point of the string.
1504 ///
1505 /// # Examples
1506 ///
1507 /// ```
1508 /// use bumpalo::{Bump, collections::String};
1509 ///
1510 /// let b = Bump::new();
1511 ///
1512 /// let mut hello = String::from_str_in("Hello, World!", &b);
1513 /// let world = hello.split_off(7);
1514 /// assert_eq!(hello, "Hello, ");
1515 /// assert_eq!(world, "World!");
1516 /// ```
1517 #[inline]
1518 pub fn split_off(&mut self, at: usize) -> String<'bump> {
1519 assert!(self.is_char_boundary(at));
1520 let other = self.vec.split_off(at);
1521 unsafe { String::from_utf8_unchecked(other) }
1522 }
1523
1524 /// Truncates this `String`, removing all contents.
1525 ///
1526 /// While this means the `String` will have a length of zero, it does not
1527 /// touch its capacity.
1528 ///
1529 /// # Examples
1530 ///
1531 /// Basic usage:
1532 ///
1533 /// ```
1534 /// use bumpalo::{Bump, collections::String};
1535 ///
1536 /// let b = Bump::new();
1537 ///
1538 /// let mut s = String::from_str_in("foo", &b);
1539 ///
1540 /// s.clear();
1541 ///
1542 /// assert!(s.is_empty());
1543 /// assert_eq!(0, s.len());
1544 /// assert_eq!(3, s.capacity());
1545 /// ```
1546 #[inline]
1547 pub fn clear(&mut self) {
1548 self.vec.clear()
1549 }
1550
1551 /// Creates a draining iterator that removes the specified range in the `String`
1552 /// and yields the removed `chars`.
1553 ///
1554 /// Note: The element range is removed even if the iterator is not
1555 /// consumed until the end.
1556 ///
1557 /// # Panics
1558 ///
1559 /// Panics if the starting point or end point do not lie on a [`char`]
1560 /// boundary, or if they're out of bounds.
1561 ///
1562 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1563 ///
1564 /// # Examples
1565 ///
1566 /// Basic usage:
1567 ///
1568 /// ```
1569 /// use bumpalo::{Bump, collections::String};
1570 ///
1571 /// let b = Bump::new();
1572 ///
1573 /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1574 /// let beta_offset = s.find('β').unwrap_or(s.len());
1575 ///
1576 /// // Remove the range up until the β from the string
1577 /// let t = String::from_iter_in(s.drain(..beta_offset), &b);
1578 /// assert_eq!(t, "α is alpha, ");
1579 /// assert_eq!(s, "β is beta");
1580 ///
1581 /// // A full range clears the string
1582 /// drop(s.drain(..));
1583 /// assert_eq!(s, "");
1584 /// ```
1585 pub fn drain<'a, R>(&'a mut self, range: R) -> Drain<'a, 'bump>
1586 where
1587 R: RangeBounds<usize>,
1588 {
1589 // Memory safety
1590 //
1591 // The String version of Drain does not have the memory safety issues
1592 // of the vector version. The data is just plain bytes.
1593 // Because the range removal happens in Drop, if the Drain iterator is leaked,
1594 // the removal will not happen.
1595 let len = self.len();
1596 let start = match range.start_bound() {
1597 Included(&n) => n,
1598 Excluded(&n) => n + 1,
1599 Unbounded => 0,
1600 };
1601 let end = match range.end_bound() {
1602 Included(&n) => n + 1,
1603 Excluded(&n) => n,
1604 Unbounded => len,
1605 };
1606
1607 // Take out two simultaneous borrows. The &mut String won't be accessed
1608 // until iteration is over, in Drop.
1609 let self_ptr = self as *mut _;
1610 // slicing does the appropriate bounds checks
1611 let chars_iter = self[start..end].chars();
1612
1613 Drain {
1614 start,
1615 end,
1616 iter: chars_iter,
1617 string: self_ptr,
1618 }
1619 }
1620
1621 /// Removes the specified range in the string,
1622 /// and replaces it with the given string.
1623 /// The given string doesn't need to be the same length as the range.
1624 ///
1625 /// # Panics
1626 ///
1627 /// Panics if the starting point or end point do not lie on a [`char`]
1628 /// boundary, or if they're out of bounds.
1629 ///
1630 /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html
1631 /// [`Vec::splice`]: ../vec/struct.Vec.html#method.splice
1632 ///
1633 /// # Examples
1634 ///
1635 /// Basic usage:
1636 ///
1637 /// ```
1638 /// use bumpalo::{Bump, collections::String};
1639 ///
1640 /// let b = Bump::new();
1641 ///
1642 /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1643 /// let beta_offset = s.find('β').unwrap_or(s.len());
1644 ///
1645 /// // Replace the range up until the β from the string
1646 /// s.replace_range(..beta_offset, "Α is capital alpha; ");
1647 /// assert_eq!(s, "Α is capital alpha; β is beta");
1648 /// ```
1649 pub fn replace_range<R>(&mut self, range: R, replace_with: &str)
1650 where
1651 R: RangeBounds<usize>,
1652 {
1653 // Memory safety
1654 //
1655 // Replace_range does not have the memory safety issues of a vector Splice.
1656 // of the vector version. The data is just plain bytes.
1657
1658 match range.start_bound() {
1659 Included(&n) => assert!(self.is_char_boundary(n)),
1660 Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
1661 Unbounded => {}
1662 };
1663 match range.end_bound() {
1664 Included(&n) => assert!(self.is_char_boundary(n + 1)),
1665 Excluded(&n) => assert!(self.is_char_boundary(n)),
1666 Unbounded => {}
1667 };
1668
1669 unsafe { self.as_mut_vec() }.splice(range, replace_with.bytes());
1670 }
1671}
1672
1673impl<'bump> FromUtf8Error<'bump> {
1674 /// Returns a slice of bytes that were attempted to convert to a `String`.
1675 ///
1676 /// # Examples
1677 ///
1678 /// Basic usage:
1679 ///
1680 /// ```
1681 /// use bumpalo::{Bump, collections::String};
1682 ///
1683 /// let b = Bump::new();
1684 ///
1685 /// // some invalid bytes, in a vector
1686 /// let bytes = bumpalo::vec![in &b; 0, 159];
1687 ///
1688 /// let value = String::from_utf8(bytes);
1689 ///
1690 /// assert_eq!(&[0, 159], value.unwrap_err().as_bytes());
1691 /// ```
1692 pub fn as_bytes(&self) -> &[u8] {
1693 &self.bytes[..]
1694 }
1695
1696 /// Returns the bytes that were attempted to convert to a `String`.
1697 ///
1698 /// This method is carefully constructed to avoid allocation. It will
1699 /// consume the error, moving out the bytes, so that a copy of the bytes
1700 /// does not need to be made.
1701 ///
1702 /// # Examples
1703 ///
1704 /// Basic usage:
1705 ///
1706 /// ```
1707 /// use bumpalo::{Bump, collections::String};
1708 ///
1709 /// let b = Bump::new();
1710 ///
1711 /// // some invalid bytes, in a vector
1712 /// let bytes = bumpalo::vec![in &b; 0, 159];
1713 ///
1714 /// let value = String::from_utf8(bytes);
1715 ///
1716 /// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
1717 /// ```
1718 pub fn into_bytes(self) -> Vec<'bump, u8> {
1719 self.bytes
1720 }
1721
1722 /// Fetch a `Utf8Error` to get more details about the conversion failure.
1723 ///
1724 /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
1725 /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
1726 /// an analogue to `FromUtf8Error`. See its documentation for more details
1727 /// on using it.
1728 ///
1729 /// [`Utf8Error`]: https://doc.rust-lang.org/std/str/struct.Utf8Error.html
1730 /// [`std::str`]: https://doc.rust-lang.org/std/str/index.html
1731 /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html
1732 /// [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
1733 ///
1734 /// # Examples
1735 ///
1736 /// Basic usage:
1737 ///
1738 /// ```
1739 /// use bumpalo::{Bump, collections::String};
1740 ///
1741 /// let b = Bump::new();
1742 ///
1743 /// // some invalid bytes, in a vector
1744 /// let bytes = bumpalo::vec![in &b; 0, 159];
1745 ///
1746 /// let error = String::from_utf8(bytes).unwrap_err().utf8_error();
1747 ///
1748 /// // the first byte is invalid here
1749 /// assert_eq!(1, error.valid_up_to());
1750 /// ```
1751 pub fn utf8_error(&self) -> Utf8Error {
1752 self.error
1753 }
1754}
1755
1756impl<'bump> fmt::Display for FromUtf8Error<'bump> {
1757 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1758 fmt::Display::fmt(&self.error, f)
1759 }
1760}
1761
1762impl fmt::Display for FromUtf16Error {
1763 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1764 fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
1765 }
1766}
1767
1768impl<'bump> Clone for String<'bump> {
1769 fn clone(&self) -> Self {
1770 String {
1771 vec: self.vec.clone(),
1772 }
1773 }
1774
1775 fn clone_from(&mut self, source: &Self) {
1776 self.vec.clone_from(&source.vec);
1777 }
1778}
1779
1780impl<'bump> Extend<char> for String<'bump> {
1781 fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) {
1782 let iterator = iter.into_iter();
1783 let (lower_bound, _) = iterator.size_hint();
1784 self.reserve(lower_bound);
1785 for ch in iterator {
1786 self.push(ch)
1787 }
1788 }
1789}
1790
1791impl<'a, 'bump> Extend<&'a char> for String<'bump> {
1792 fn extend<I: IntoIterator<Item = &'a char>>(&mut self, iter: I) {
1793 self.extend(iter.into_iter().cloned());
1794 }
1795}
1796
1797impl<'a, 'bump> Extend<&'a str> for String<'bump> {
1798 fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) {
1799 for s in iter {
1800 self.push_str(s)
1801 }
1802 }
1803}
1804
1805impl<'bump> Extend<String<'bump>> for String<'bump> {
1806 fn extend<I: IntoIterator<Item = String<'bump>>>(&mut self, iter: I) {
1807 for s in iter {
1808 self.push_str(&s)
1809 }
1810 }
1811}
1812
1813impl<'bump> Extend<core_alloc::string::String> for String<'bump> {
1814 fn extend<I: IntoIterator<Item = core_alloc::string::String>>(&mut self, iter: I) {
1815 for s in iter {
1816 self.push_str(&s)
1817 }
1818 }
1819}
1820
1821impl<'a, 'bump> Extend<Cow<'a, str>> for String<'bump> {
1822 fn extend<I: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: I) {
1823 for s in iter {
1824 self.push_str(&s)
1825 }
1826 }
1827}
1828
1829impl<'bump> PartialEq for String<'bump> {
1830 #[inline]
1831 fn eq(&self, other: &String) -> bool {
1832 PartialEq::eq(&self[..], &other[..])
1833 }
1834}
1835
1836macro_rules! impl_eq {
1837 ($lhs:ty, $rhs: ty) => {
1838 impl<'a, 'bump> PartialEq<$rhs> for $lhs {
1839 #[inline]
1840 fn eq(&self, other: &$rhs) -> bool {
1841 PartialEq::eq(&self[..], &other[..])
1842 }
1843 }
1844
1845 impl<'a, 'b, 'bump> PartialEq<$lhs> for $rhs {
1846 #[inline]
1847 fn eq(&self, other: &$lhs) -> bool {
1848 PartialEq::eq(&self[..], &other[..])
1849 }
1850 }
1851 };
1852}
1853
1854impl_eq! { String<'bump>, str }
1855impl_eq! { String<'bump>, &'a str }
1856impl_eq! { Cow<'a, str>, String<'bump> }
1857impl_eq! { core_alloc::string::String, String<'bump> }
1858
1859impl<'bump> fmt::Display for String<'bump> {
1860 #[inline]
1861 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1862 fmt::Display::fmt(&**self, f)
1863 }
1864}
1865
1866impl<'bump> fmt::Debug for String<'bump> {
1867 #[inline]
1868 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1869 fmt::Debug::fmt(&**self, f)
1870 }
1871}
1872
1873impl<'bump> hash::Hash for String<'bump> {
1874 #[inline]
1875 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
1876 (**self).hash(hasher)
1877 }
1878}
1879
1880/// Implements the `+` operator for concatenating two strings.
1881///
1882/// This consumes the `String<'bump>` on the left-hand side and re-uses its buffer (growing it if
1883/// necessary). This is done to avoid allocating a new `String<'bump>` and copying the entire contents on
1884/// every operation, which would lead to `O(n^2)` running time when building an `n`-byte string by
1885/// repeated concatenation.
1886///
1887/// The string on the right-hand side is only borrowed; its contents are copied into the returned
1888/// `String<'bump>`.
1889///
1890/// # Examples
1891///
1892/// Concatenating two `String<'bump>`s takes the first by value and borrows the second:
1893///
1894/// ```
1895/// use bumpalo::{Bump, collections::String};
1896///
1897/// let bump = Bump::new();
1898///
1899/// let a = String::from_str_in("hello", &bump);
1900/// let b = String::from_str_in(" world", &bump);
1901/// let c = a + &b;
1902/// // `a` is moved and can no longer be used here.
1903/// ```
1904///
1905/// If you want to keep using the first `String`, you can clone it and append to the clone instead:
1906///
1907/// ```
1908/// use bumpalo::{Bump, collections::String};
1909///
1910/// let bump = Bump::new();
1911///
1912/// let a = String::from_str_in("hello", &bump);
1913/// let b = String::from_str_in(" world", &bump);
1914/// let c = a.clone() + &b;
1915/// // `a` is still valid here.
1916/// ```
1917///
1918/// Concatenating `&str` slices can be done by converting the first to a `String`:
1919///
1920/// ```
1921/// use bumpalo::{Bump, collections::String};
1922///
1923/// let bump = Bump::new();
1924///
1925/// let a = "hello";
1926/// let b = " world";
1927/// let c = String::from_str_in(a, &bump) + b;
1928/// ```
1929impl<'a, 'bump> Add<&'a str> for String<'bump> {
1930 type Output = String<'bump>;
1931
1932 #[inline]
1933 fn add(mut self, other: &str) -> String<'bump> {
1934 self.push_str(other);
1935 self
1936 }
1937}
1938
1939/// Implements the `+=` operator for appending to a `String<'bump>`.
1940///
1941/// This has the same behavior as the [`push_str`][String::push_str] method.
1942impl<'a, 'bump> AddAssign<&'a str> for String<'bump> {
1943 #[inline]
1944 fn add_assign(&mut self, other: &str) {
1945 self.push_str(other);
1946 }
1947}
1948
1949impl<'bump> ops::Index<ops::Range<usize>> for String<'bump> {
1950 type Output = str;
1951
1952 #[inline]
1953 fn index(&self, index: ops::Range<usize>) -> &str {
1954 &self[..][index]
1955 }
1956}
1957impl<'bump> ops::Index<ops::RangeTo<usize>> for String<'bump> {
1958 type Output = str;
1959
1960 #[inline]
1961 fn index(&self, index: ops::RangeTo<usize>) -> &str {
1962 &self[..][index]
1963 }
1964}
1965impl<'bump> ops::Index<ops::RangeFrom<usize>> for String<'bump> {
1966 type Output = str;
1967
1968 #[inline]
1969 fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1970 &self[..][index]
1971 }
1972}
1973impl<'bump> ops::Index<ops::RangeFull> for String<'bump> {
1974 type Output = str;
1975
1976 #[inline]
1977 fn index(&self, _index: ops::RangeFull) -> &str {
1978 unsafe { str::from_utf8_unchecked(&self.vec) }
1979 }
1980}
1981impl<'bump> ops::Index<ops::RangeInclusive<usize>> for String<'bump> {
1982 type Output = str;
1983
1984 #[inline]
1985 fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1986 Index::index(&**self, index)
1987 }
1988}
1989impl<'bump> ops::Index<ops::RangeToInclusive<usize>> for String<'bump> {
1990 type Output = str;
1991
1992 #[inline]
1993 fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1994 Index::index(&**self, index)
1995 }
1996}
1997
1998impl<'bump> ops::IndexMut<ops::Range<usize>> for String<'bump> {
1999 #[inline]
2000 fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
2001 &mut self[..][index]
2002 }
2003}
2004impl<'bump> ops::IndexMut<ops::RangeTo<usize>> for String<'bump> {
2005 #[inline]
2006 fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
2007 &mut self[..][index]
2008 }
2009}
2010impl<'bump> ops::IndexMut<ops::RangeFrom<usize>> for String<'bump> {
2011 #[inline]
2012 fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
2013 &mut self[..][index]
2014 }
2015}
2016impl<'bump> ops::IndexMut<ops::RangeFull> for String<'bump> {
2017 #[inline]
2018 fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
2019 unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2020 }
2021}
2022impl<'bump> ops::IndexMut<ops::RangeInclusive<usize>> for String<'bump> {
2023 #[inline]
2024 fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
2025 IndexMut::index_mut(&mut **self, index)
2026 }
2027}
2028impl<'bump> ops::IndexMut<ops::RangeToInclusive<usize>> for String<'bump> {
2029 #[inline]
2030 fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
2031 IndexMut::index_mut(&mut **self, index)
2032 }
2033}
2034
2035impl<'bump> ops::Deref for String<'bump> {
2036 type Target = str;
2037
2038 #[inline]
2039 fn deref(&self) -> &str {
2040 unsafe { str::from_utf8_unchecked(&self.vec) }
2041 }
2042}
2043
2044impl<'bump> ops::DerefMut for String<'bump> {
2045 #[inline]
2046 fn deref_mut(&mut self) -> &mut str {
2047 unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2048 }
2049}
2050
2051impl<'bump> AsRef<str> for String<'bump> {
2052 #[inline]
2053 fn as_ref(&self) -> &str {
2054 self
2055 }
2056}
2057
2058impl<'bump> AsRef<[u8]> for String<'bump> {
2059 #[inline]
2060 fn as_ref(&self) -> &[u8] {
2061 self.as_bytes()
2062 }
2063}
2064
2065impl<'bump> fmt::Write for String<'bump> {
2066 #[inline]
2067 fn write_str(&mut self, s: &str) -> fmt::Result {
2068 self.push_str(s);
2069 Ok(())
2070 }
2071
2072 #[inline]
2073 fn write_char(&mut self, c: char) -> fmt::Result {
2074 self.push(c);
2075 Ok(())
2076 }
2077}
2078
2079impl<'bump> Borrow<str> for String<'bump> {
2080 #[inline]
2081 fn borrow(&self) -> &str {
2082 &self[..]
2083 }
2084}
2085
2086impl<'bump> BorrowMut<str> for String<'bump> {
2087 #[inline]
2088 fn borrow_mut(&mut self) -> &mut str {
2089 &mut self[..]
2090 }
2091}
2092
2093/// A draining iterator for `String`.
2094///
2095/// This struct is created by the [`String::drain`] method. See its
2096/// documentation for more information.
2097pub struct Drain<'a, 'bump> {
2098 /// Will be used as &'a mut String in the destructor
2099 string: *mut String<'bump>,
2100 /// Start of part to remove
2101 start: usize,
2102 /// End of part to remove
2103 end: usize,
2104 /// Current remaining range to remove
2105 iter: Chars<'a>,
2106}
2107
2108impl<'a, 'bump> fmt::Debug for Drain<'a, 'bump> {
2109 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2110 f.pad("Drain { .. }")
2111 }
2112}
2113
2114unsafe impl<'a, 'bump> Sync for Drain<'a, 'bump> {}
2115unsafe impl<'a, 'bump> Send for Drain<'a, 'bump> {}
2116
2117impl<'a, 'bump> Drop for Drain<'a, 'bump> {
2118 fn drop(&mut self) {
2119 unsafe {
2120 // Use Vec::drain. "Reaffirm" the bounds checks to avoid
2121 // panic code being inserted again.
2122 let self_vec = (*self.string).as_mut_vec();
2123 if self.start <= self.end && self.end <= self_vec.len() {
2124 self_vec.drain(self.start..self.end);
2125 }
2126 }
2127 }
2128}
2129
2130// TODO: implement `AsRef<str/[u8]>` and `as_str`
2131
2132impl<'a, 'bump> Iterator for Drain<'a, 'bump> {
2133 type Item = char;
2134
2135 #[inline]
2136 fn next(&mut self) -> Option<char> {
2137 self.iter.next()
2138 }
2139
2140 fn size_hint(&self) -> (usize, Option<usize>) {
2141 self.iter.size_hint()
2142 }
2143}
2144
2145impl<'a, 'bump> DoubleEndedIterator for Drain<'a, 'bump> {
2146 #[inline]
2147 fn next_back(&mut self) -> Option<char> {
2148 self.iter.next_back()
2149 }
2150}
2151
2152impl<'a, 'bump> FusedIterator for Drain<'a, 'bump> {}