bumpalo/collections/string.rs
1// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! A UTF-8 encoded, growable string.
12//!
13//! This module contains the [`String`] type and several error types that may
14//! result from working with [`String`]s.
15//!
16//! # Examples
17//!
18//! You can create a new [`String`] from a string literal with [`String::from_str_in`]:
19//!
20//! ```
21//! use bumpalo::{Bump, collections::String};
22//!
23//! let b = Bump::new();
24//!
25//! let s = String::from_str_in("world", &b);
26//! ```
27//!
28//! You can create a new [`String`] from an existing one by concatenating with
29//! `+`:
30//!
31//! [`String`]: struct.String.html
32//! [`String::from_str_in`]: struct.String.html#method.from_str_in
33//!
34//! ```
35//! use bumpalo::{Bump, collections::String};
36//!
37//! let s = "Hello".to_string();
38//!
39//! let message = s + " world!";
40//! ```
41//!
42//! If you have a vector of valid UTF-8 bytes, you can make a [`String`] out of
43//! it. You can do the reverse too.
44//!
45//! ```
46//! use bumpalo::{Bump, collections::String};
47//!
48//! let b = Bump::new();
49//!
50//! let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
51//!
52//! // We know these bytes are valid, so we'll use `unwrap()`.
53//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
54//!
55//! assert_eq!("💖", sparkle_heart);
56//!
57//! let bytes = sparkle_heart.into_bytes();
58//!
59//! assert_eq!(bytes, [240, 159, 146, 150]);
60//! ```
61
62use crate::collections::str::lossy;
63use crate::collections::vec::Vec;
64use crate::Bump;
65use core::char::decode_utf16;
66use core::fmt;
67use core::hash;
68use core::iter::FusedIterator;
69use core::mem;
70use core::ops::Bound::{Excluded, Included, Unbounded};
71use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds};
72use core::ptr;
73use core::str::{self, Chars, Utf8Error};
74use core_alloc::borrow::Cow;
75
76/// Like the `format!` macro for creating `std::string::String`s but for
77/// `bumpalo::collections::String`.
78///
79/// # Examples
80///
81/// ```
82/// use bumpalo::Bump;
83///
84/// let b = Bump::new();
85///
86/// let who = "World";
87/// let s = bumpalo::format!(in &b, "Hello, {}!", who);
88/// assert_eq!(s, "Hello, World!")
89/// ```
90#[macro_export]
91macro_rules! format {
92 ( in $bump:expr, $fmt:expr, $($args:expr),* ) => {{
93 use std::fmt::Write;
94 let bump = $bump;
95 let mut s = $crate::collections::String::new_in(bump);
96 let _ = write!(&mut s, $fmt, $($args),*);
97 s
98 }};
99
100 ( in $bump:expr, $fmt:expr, $($args:expr,)* ) => {
101 $crate::format!(in $bump, $fmt, $($args),*)
102 };
103}
104
105/// A UTF-8 encoded, growable string.
106///
107/// The `String` type is the most common string type that has ownership over the
108/// contents of the string. It has a close relationship with its borrowed
109/// counterpart, the primitive [`str`].
110///
111/// [`str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
112///
113/// # Examples
114///
115/// You can create a `String` from a literal string with [`String::from_iter_in`]:
116///
117/// ```
118/// use bumpalo::{Bump, collections::String};
119///
120/// let b = Bump::new();
121///
122/// let hello = String::from_str_in("Hello, world!", &b);
123/// ```
124///
125/// You can append a [`char`] to a `String` with the [`push`] method, and
126/// append a [`&str`] with the [`push_str`] method:
127///
128/// ```
129/// use bumpalo::{Bump, collections::String};
130///
131/// let b = Bump::new();
132///
133/// let mut hello = String::from_str_in("Hello, ", &b);
134///
135/// hello.push('w');
136/// hello.push_str("orld!");
137/// ```
138///
139/// [`String::from_iter_in`]: #method.from_iter_in
140/// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
141/// [`push`]: #method.push
142/// [`push_str`]: #method.push_str
143///
144/// If you have a vector of UTF-8 bytes, you can create a `String` from it with
145/// the [`from_utf8`] method:
146///
147/// ```
148/// use bumpalo::{Bump, collections::String};
149///
150/// let b = Bump::new();
151///
152/// // some bytes, in a vector
153/// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
154///
155/// // We know these bytes are valid, so we'll use `unwrap()`.
156/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
157///
158/// assert_eq!("💖", sparkle_heart);
159/// ```
160///
161/// [`from_utf8`]: #method.from_utf8
162///
163/// # UTF-8
164///
165/// `String`s are always valid UTF-8. This has a few implications, the first of
166/// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is
167/// similar, but without the UTF-8 constraint. The second implication is that
168/// you cannot index into a `String`:
169///
170/// ```compile_fail,E0277
171/// let s = "hello";
172///
173/// println!("The first letter of s is {}", s[0]); // ERROR!!!
174/// ```
175///
176/// [`OsString`]: https://doc.rust-lang.org/nightly/std/ffi/struct.OsString.html
177///
178/// Indexing is intended to be a constant-time operation, but UTF-8 encoding
179/// does not allow us to do this. Furthermore, it's not clear what sort of
180/// thing the index should return: a byte, a codepoint, or a grapheme cluster.
181/// The [`bytes`] and [`chars`] methods return iterators over the first
182/// two, respectively.
183///
184/// [`bytes`]: #method.bytes
185/// [`chars`]: #method.chars
186///
187/// # Deref
188///
189/// `String`s implement [`Deref`]`<Target=str>`, and so inherit all of [`str`]'s
190/// methods. In addition, this means that you can pass a `String` to a
191/// function which takes a [`&str`] by using an ampersand (`&`):
192///
193/// ```
194/// use bumpalo::{Bump, collections::String};
195///
196/// let b = Bump::new();
197///
198/// fn takes_str(s: &str) { }
199///
200/// let s = String::from_str_in("Hello", &b);
201///
202/// takes_str(&s);
203/// ```
204///
205/// This will create a [`&str`] from the `String` and pass it in. This
206/// conversion is very inexpensive, and so generally, functions will accept
207/// [`&str`]s as arguments unless they need a `String` for some specific
208/// reason.
209///
210/// In certain cases Rust doesn't have enough information to make this
211/// conversion, known as [`Deref`] coercion. In the following example a string
212/// slice [`&'a str`][`&str`] implements the trait `TraitExample`, and the function
213/// `example_func` takes anything that implements the trait. In this case Rust
214/// would need to make two implicit conversions, which Rust doesn't have the
215/// means to do. For that reason, the following example will not compile.
216///
217/// ```compile_fail,E0277
218/// use bumpalo::{Bump, collections::String};
219///
220/// trait TraitExample {}
221///
222/// impl<'a> TraitExample for &'a str {}
223///
224/// fn example_func<A: TraitExample>(example_arg: A) {}
225///
226/// let b = Bump::new();
227/// let example_string = String::from_str_in("example_string", &b);
228/// example_func(&example_string);
229/// ```
230///
231/// There are two options that would work instead. The first would be to
232/// change the line `example_func(&example_string);` to
233/// `example_func(example_string.as_str());`, using the method [`as_str()`]
234/// to explicitly extract the string slice containing the string. The second
235/// way changes `example_func(&example_string);` to
236/// `example_func(&*example_string);`. In this case we are dereferencing a
237/// `String` to a [`str`][`&str`], then referencing the [`str`][`&str`] back to
238/// [`&str`]. The second way is more idiomatic, however both work to do the
239/// conversion explicitly rather than relying on the implicit conversion.
240///
241/// # Representation
242///
243/// A `String` is made up of three components: a pointer to some bytes, a
244/// length, and a capacity. The pointer points to an internal buffer `String`
245/// uses to store its data. The length is the number of bytes currently stored
246/// in the buffer, and the capacity is the size of the buffer in bytes. As such,
247/// the length will always be less than or equal to the capacity.
248///
249/// This buffer is always stored on the heap.
250///
251/// You can look at these with the [`as_ptr`], [`len`], and [`capacity`]
252/// methods:
253///
254/// ```
255/// use bumpalo::{Bump, collections::String};
256/// use std::mem;
257///
258/// let b = Bump::new();
259///
260/// let story = String::from_str_in("Once upon a time...", &b);
261///
262/// let ptr = story.as_ptr();
263/// let len = story.len();
264/// let capacity = story.capacity();
265///
266/// // story has nineteen bytes
267/// assert_eq!(19, len);
268///
269/// // Now that we have our parts, we throw the story away.
270/// mem::forget(story);
271///
272/// // We can re-build a String out of ptr, len, and capacity. This is all
273/// // unsafe because we are responsible for making sure the components are
274/// // valid:
275/// let s = unsafe { String::from_raw_parts_in(ptr as *mut _, len, capacity, &b) } ;
276///
277/// assert_eq!(String::from_str_in("Once upon a time...", &b), s);
278/// ```
279///
280/// [`as_ptr`]: #method.as_ptr
281/// [`len`]: #method.len
282/// [`capacity`]: #method.capacity
283///
284/// If a `String` has enough capacity, adding elements to it will not
285/// re-allocate. For example, consider this program:
286///
287/// ```
288/// use bumpalo::{Bump, collections::String};
289///
290/// let b = Bump::new();
291///
292/// let mut s = String::new_in(&b);
293///
294/// println!("{}", s.capacity());
295///
296/// for _ in 0..5 {
297/// s.push_str("hello");
298/// println!("{}", s.capacity());
299/// }
300/// ```
301///
302/// This will output the following:
303///
304/// ```text
305/// 0
306/// 5
307/// 10
308/// 20
309/// 20
310/// 40
311/// ```
312///
313/// At first, we have no memory allocated at all, but as we append to the
314/// string, it increases its capacity appropriately. If we instead use the
315/// [`with_capacity_in`] method to allocate the correct capacity initially:
316///
317/// ```
318/// use bumpalo::{Bump, collections::String};
319///
320/// let b = Bump::new();
321///
322/// let mut s = String::with_capacity_in(25, &b);
323///
324/// println!("{}", s.capacity());
325///
326/// for _ in 0..5 {
327/// s.push_str("hello");
328/// println!("{}", s.capacity());
329/// }
330/// ```
331///
332/// [`with_capacity_in`]: #method.with_capacity_in
333///
334/// We end up with a different output:
335///
336/// ```text
337/// 25
338/// 25
339/// 25
340/// 25
341/// 25
342/// 25
343/// ```
344///
345/// Here, there's no need to allocate more memory inside the loop.
346///
347/// [`&str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
348/// [`Deref`]: https://doc.rust-lang.org/nightly/std/ops/trait.Deref.html
349/// [`as_str()`]: struct.String.html#method.as_str
350#[derive(PartialOrd, Eq, Ord)]
351pub struct String<'bump> {
352 vec: Vec<'bump, u8>,
353}
354
355/// A possible error value when converting a `String` from a UTF-8 byte vector.
356///
357/// This type is the error type for the [`from_utf8`] method on [`String`]. It
358/// is designed in such a way to carefully avoid reallocations: the
359/// [`into_bytes`] method will give back the byte vector that was used in the
360/// conversion attempt.
361///
362/// [`from_utf8`]: struct.String.html#method.from_utf8
363/// [`String`]: struct.String.html
364/// [`into_bytes`]: struct.FromUtf8Error.html#method.into_bytes
365///
366/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
367/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
368/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error`
369/// through the [`utf8_error`] method.
370///
371/// [`Utf8Error`]: https://doc.rust-lang.org/nightly/std/str/struct.Utf8Error.html
372/// [`std::str`]: https://doc.rust-lang.org/nightly/std/str/index.html
373/// [`u8`]: https://doc.rust-lang.org/nightly/std/primitive.u8.html
374/// [`&str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
375/// [`utf8_error`]: #method.utf8_error
376///
377/// # Examples
378///
379/// Basic usage:
380///
381/// ```
382/// use bumpalo::{Bump, collections::String};
383///
384/// let b = Bump::new();
385///
386/// // some invalid bytes, in a vector
387/// let bytes = bumpalo::vec![in &b; 0, 159];
388///
389/// let value = String::from_utf8(bytes);
390///
391/// assert!(value.is_err());
392/// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
393/// ```
394#[derive(Debug)]
395pub struct FromUtf8Error<'bump> {
396 bytes: Vec<'bump, u8>,
397 error: Utf8Error,
398}
399
400/// A possible error value when converting a `String` from a UTF-16 byte slice.
401///
402/// This type is the error type for the [`from_utf16`] method on [`String`].
403///
404/// [`from_utf16`]: struct.String.html#method.from_utf16
405/// [`String`]: struct.String.html
406///
407/// # Examples
408///
409/// Basic usage:
410///
411/// ```
412/// use bumpalo::{Bump, collections::String};
413///
414/// let b = Bump::new();
415///
416/// // 𝄞mu<invalid>ic
417/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
418/// 0xD800, 0x0069, 0x0063];
419///
420/// assert!(String::from_utf16_in(v, &b).is_err());
421/// ```
422#[derive(Debug)]
423pub struct FromUtf16Error(());
424
425impl<'bump> String<'bump> {
426 /// Creates a new empty `String`.
427 ///
428 /// Given that the `String` is empty, this will not allocate any initial
429 /// buffer. While that means that this initial operation is very
430 /// inexpensive, it may cause excessive allocation later when you add
431 /// data. If you have an idea of how much data the `String` will hold,
432 /// consider the [`with_capacity_in`] method to prevent excessive
433 /// re-allocation.
434 ///
435 /// [`with_capacity_in`]: #method.with_capacity_in
436 ///
437 /// # Examples
438 ///
439 /// Basic usage:
440 ///
441 /// ```
442 /// use bumpalo::{Bump, collections::String};
443 ///
444 /// let b = Bump::new();
445 ///
446 /// let s = String::new_in(&b);
447 /// ```
448 #[inline]
449 pub fn new_in(bump: &'bump Bump) -> String<'bump> {
450 String {
451 vec: Vec::new_in(bump),
452 }
453 }
454
455 /// Creates a new empty `String` with a particular capacity.
456 ///
457 /// `String`s have an internal buffer to hold their data. The capacity is
458 /// the length of that buffer, and can be queried with the [`capacity`]
459 /// method. This method creates an empty `String`, but one with an initial
460 /// buffer that can hold `capacity` bytes. This is useful when you may be
461 /// appending a bunch of data to the `String`, reducing the number of
462 /// reallocations it needs to do.
463 ///
464 /// [`capacity`]: #method.capacity
465 ///
466 /// If the given capacity is `0`, no allocation will occur, and this method
467 /// is identical to the [`new_in`] method.
468 ///
469 /// [`new_in`]: #method.new
470 ///
471 /// # Examples
472 ///
473 /// Basic usage:
474 ///
475 /// ```
476 /// use bumpalo::{Bump, collections::String};
477 ///
478 /// let b = Bump::new();
479 ///
480 /// let mut s = String::with_capacity_in(10, &b);
481 ///
482 /// // The String contains no chars, even though it has capacity for more
483 /// assert_eq!(s.len(), 0);
484 ///
485 /// // These are all done without reallocating...
486 /// let cap = s.capacity();
487 /// for _ in 0..10 {
488 /// s.push('a');
489 /// }
490 ///
491 /// assert_eq!(s.capacity(), cap);
492 ///
493 /// // ...but this may make the vector reallocate
494 /// s.push('a');
495 /// ```
496 #[inline]
497 pub fn with_capacity_in(capacity: usize, bump: &'bump Bump) -> String<'bump> {
498 String {
499 vec: Vec::with_capacity_in(capacity, bump),
500 }
501 }
502
503 /// Converts a vector of bytes to a `String`.
504 ///
505 /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a vector of bytes
506 /// ([`Vec<u8>`]) is made of bytes, so this function converts between the
507 /// two. Not all byte slices are valid `String`s, however: `String`
508 /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
509 /// the bytes are valid UTF-8, and then does the conversion.
510 ///
511 /// If you are sure that the byte slice is valid UTF-8, and you don't want
512 /// to incur the overhead of the validity check, there is an unsafe version
513 /// of this function, [`from_utf8_unchecked`], which has the same behavior
514 /// but skips the check.
515 ///
516 /// This method will take care to not copy the vector, for efficiency's
517 /// sake.
518 ///
519 /// If you need a [`&str`] instead of a `String`, consider
520 /// [`str::from_utf8`].
521 ///
522 /// The inverse of this method is [`as_bytes`].
523 ///
524 /// # Errors
525 ///
526 /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
527 /// provided bytes are not UTF-8. The vector you moved in is also included.
528 ///
529 /// # Examples
530 ///
531 /// Basic usage:
532 ///
533 /// ```
534 /// use bumpalo::{Bump, collections::String};
535 ///
536 /// let b = Bump::new();
537 ///
538 /// // some bytes, in a vector
539 /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
540 ///
541 /// // We know these bytes are valid, so we'll use `unwrap()`.
542 /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
543 ///
544 /// assert_eq!("💖", sparkle_heart);
545 /// ```
546 ///
547 /// Incorrect bytes:
548 ///
549 /// ```
550 /// use bumpalo::{Bump, collections::String};
551 ///
552 /// let b = Bump::new();
553 ///
554 /// // some invalid bytes, in a vector
555 /// let sparkle_heart = bumpalo::vec![in &b; 0, 159, 146, 150];
556 ///
557 /// assert!(String::from_utf8(sparkle_heart).is_err());
558 /// ```
559 ///
560 /// See the docs for [`FromUtf8Error`] for more details on what you can do
561 /// with this error.
562 ///
563 /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
564 /// [`&str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
565 /// [`u8`]: https://doc.rust-lang.org/nightly/std/primitive.u8.html
566 /// [`Vec<u8>`]: ../vec/struct.Vec.html
567 /// [`str::from_utf8`]: https://doc.rust-lang.org/nightly/std/str/fn.from_utf8.html
568 /// [`as_bytes`]: struct.String.html#method.as_bytes
569 /// [`FromUtf8Error`]: struct.FromUtf8Error.html
570 /// [`Err`]: https://doc.rust-lang.org/nightly/std/result/enum.Result.html#variant.Err
571 #[inline]
572 pub fn from_utf8(vec: Vec<'bump, u8>) -> Result<String<'bump>, FromUtf8Error<'bump>> {
573 match str::from_utf8(&vec) {
574 Ok(..) => Ok(String { vec }),
575 Err(e) => Err(FromUtf8Error {
576 bytes: vec,
577 error: e,
578 }),
579 }
580 }
581
582 /// Converts a slice of bytes to a string, including invalid characters.
583 ///
584 /// Strings are made of bytes ([`u8`]), and a slice of bytes
585 /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts
586 /// between the two. Not all byte slices are valid strings, however: strings
587 /// are required to be valid UTF-8. During this conversion,
588 /// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with
589 /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], which looks like this: �
590 ///
591 /// [`u8`]: https://doc.rust-lang.org/nightly/std/primitive.u8.html
592 /// [byteslice]: https://doc.rust-lang.org/nightly/std/primitive.slice.html
593 /// [U+FFFD]: ../char/constant.REPLACEMENT_CHARACTER.html
594 ///
595 /// If you are sure that the byte slice is valid UTF-8, and you don't want
596 /// to incur the overhead of the conversion, there is an unsafe version
597 /// of this function, [`from_utf8_unchecked`], which has the same behavior
598 /// but skips the checks.
599 ///
600 /// [`from_utf8_unchecked`]: struct.String.html#method.from_utf8_unchecked
601 ///
602 /// # Examples
603 ///
604 /// Basic usage:
605 ///
606 /// ```
607 /// use bumpalo::{collections::String, Bump, vec};
608 ///
609 /// let b = Bump::new();
610 ///
611 /// // some bytes, in a vector
612 /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
613 ///
614 /// let sparkle_heart = String::from_utf8_lossy_in(&sparkle_heart, &b);
615 ///
616 /// assert_eq!("💖", sparkle_heart);
617 /// ```
618 ///
619 /// Incorrect bytes:
620 ///
621 /// ```
622 /// use bumpalo::{collections::String, Bump, vec};
623 ///
624 /// let b = Bump::new();
625 ///
626 /// // some invalid bytes
627 /// let input = b"Hello \xF0\x90\x80World";
628 /// let output = String::from_utf8_lossy_in(input, &b);
629 ///
630 /// assert_eq!("Hello �World", output);
631 /// ```
632 pub fn from_utf8_lossy_in(v: &[u8], bump: &'bump Bump) -> String<'bump> {
633 let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks();
634
635 let (first_valid, first_broken) = if let Some(chunk) = iter.next() {
636 let lossy::Utf8LossyChunk { valid, broken } = chunk;
637 if valid.len() == v.len() {
638 debug_assert!(broken.is_empty());
639 unsafe {
640 return String::from_utf8_unchecked(Vec::from_iter_in(v.iter().cloned(), bump));
641 }
642 }
643 (valid, broken)
644 } else {
645 return String::from_str_in("", bump);
646 };
647
648 const REPLACEMENT: &str = "\u{FFFD}";
649
650 let mut res = String::with_capacity_in(v.len(), bump);
651 res.push_str(first_valid);
652 if !first_broken.is_empty() {
653 res.push_str(REPLACEMENT);
654 }
655
656 for lossy::Utf8LossyChunk { valid, broken } in iter {
657 res.push_str(valid);
658 if !broken.is_empty() {
659 res.push_str(REPLACEMENT);
660 }
661 }
662
663 res
664 }
665
666 /// Decode a UTF-16 encoded vector `v` into a `String`, returning [`Err`]
667 /// if `v` contains any invalid data.
668 ///
669 /// [`Err`]: https://doc.rust-lang.org/nightly/std/result/enum.Result.html#variant.Err
670 ///
671 /// # Examples
672 ///
673 /// Basic usage:
674 ///
675 /// ```
676 /// use bumpalo::{Bump, collections::String};
677 ///
678 /// let b = Bump::new();
679 ///
680 /// // 𝄞music
681 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
682 /// 0x0073, 0x0069, 0x0063];
683 /// assert_eq!(String::from_str_in("𝄞music", &b),
684 /// String::from_utf16_in(v, &b).unwrap());
685 ///
686 /// // 𝄞mu<invalid>ic
687 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
688 /// 0xD800, 0x0069, 0x0063];
689 /// assert!(String::from_utf16_in(v, &b).is_err());
690 /// ```
691 pub fn from_utf16_in(v: &[u16], bump: &'bump Bump) -> Result<String<'bump>, FromUtf16Error> {
692 let mut ret = String::with_capacity_in(v.len(), bump);
693 for c in decode_utf16(v.iter().cloned()) {
694 if let Ok(c) = c {
695 ret.push(c);
696 } else {
697 return Err(FromUtf16Error(()));
698 }
699 }
700 Ok(ret)
701 }
702
703 /// Construct a new `String<'bump>` from an iterator of `char`s.
704 ///
705 /// # Examples
706 ///
707 /// ```
708 /// use bumpalo::{Bump, collections::String};
709 ///
710 /// let b = Bump::new();
711 ///
712 /// let s = String::from_str_in("hello", &b);
713 /// assert_eq!(s, "hello");
714 /// ```
715 pub fn from_str_in(s: &str, bump: &'bump Bump) -> String<'bump> {
716 let mut t = String::with_capacity_in(s.len(), bump);
717 t.push_str(s);
718 t
719 }
720
721 /// Construct a new `String<'bump>` from an iterator of `char`s.
722 ///
723 /// # Examples
724 ///
725 /// ```
726 /// use bumpalo::{Bump, collections::String};
727 ///
728 /// let b = Bump::new();
729 ///
730 /// let s = String::from_str_in("hello", &b);
731 /// assert_eq!(s, "hello");
732 /// ```
733 pub fn from_iter_in<I: IntoIterator<Item = char>>(iter: I, bump: &'bump Bump) -> String<'bump> {
734 let mut s = String::new_in(bump);
735 for c in iter {
736 s.push(c);
737 }
738 s
739 }
740
741 /// Creates a new `String` from a length, capacity, and pointer.
742 ///
743 /// # Safety
744 ///
745 /// This is highly unsafe, due to the number of invariants that aren't
746 /// checked:
747 ///
748 /// * The memory at `ptr` needs to have been previously allocated by the
749 /// same allocator the standard library uses.
750 /// * `length` needs to be less than or equal to `capacity`.
751 /// * `capacity` needs to be the correct value.
752 ///
753 /// Violating these may cause problems like corrupting the allocator's
754 /// internal data structures.
755 ///
756 /// The ownership of `ptr` is effectively transferred to the
757 /// `String` which may then deallocate, reallocate or change the
758 /// contents of memory pointed to by the pointer at will. Ensure
759 /// that nothing else uses the pointer after calling this
760 /// function.
761 ///
762 /// # Examples
763 ///
764 /// Basic usage:
765 ///
766 /// ```
767 /// use bumpalo::{Bump, collections::String};
768 /// use std::mem;
769 ///
770 /// let b = Bump::new();
771 ///
772 /// unsafe {
773 /// let s = String::from_str_in("hello", &b);
774 /// let ptr = s.as_ptr();
775 /// let len = s.len();
776 /// let capacity = s.capacity();
777 ///
778 /// mem::forget(s);
779 ///
780 /// let s = String::from_raw_parts_in(ptr as *mut _, len, capacity, &b);
781 ///
782 /// assert_eq!(String::from_str_in("hello", &b), s);
783 /// }
784 /// ```
785 #[inline]
786 pub unsafe fn from_raw_parts_in(
787 buf: *mut u8,
788 length: usize,
789 capacity: usize,
790 bump: &'bump Bump,
791 ) -> String<'bump> {
792 String {
793 vec: Vec::from_raw_parts_in(buf, length, capacity, bump),
794 }
795 }
796
797 /// Converts a vector of bytes to a `String` without checking that the
798 /// string contains valid UTF-8.
799 ///
800 /// See the safe version, [`from_utf8`], for more details.
801 ///
802 /// [`from_utf8`]: struct.String.html#method.from_utf8
803 ///
804 /// # Safety
805 ///
806 /// This function is unsafe because it does not check that the bytes passed
807 /// to it are valid UTF-8. If this constraint is violated, it may cause
808 /// memory unsafety issues with future users of the `String`, as the rest of
809 /// the standard library assumes that `String`s are valid UTF-8.
810 ///
811 /// # Examples
812 ///
813 /// Basic usage:
814 ///
815 /// ```
816 /// use bumpalo::{Bump, collections::String};
817 ///
818 /// let b = Bump::new();
819 ///
820 /// // some bytes, in a vector
821 /// let sparkle_heart = bumpalo::vec![in &b; 240, 159, 146, 150];
822 ///
823 /// let sparkle_heart = unsafe {
824 /// String::from_utf8_unchecked(sparkle_heart)
825 /// };
826 ///
827 /// assert_eq!("💖", sparkle_heart);
828 /// ```
829 #[inline]
830 pub unsafe fn from_utf8_unchecked(bytes: Vec<'bump, u8>) -> String<'bump> {
831 String { vec: bytes }
832 }
833
834 /// Converts a `String` into a byte vector.
835 ///
836 /// This consumes the `String`, so we do not need to copy its contents.
837 ///
838 /// # Examples
839 ///
840 /// Basic usage:
841 ///
842 /// ```
843 /// use bumpalo::{Bump, collections::String};
844 ///
845 /// let b = Bump::new();
846 ///
847 /// let s = String::from_str_in("hello", &b);
848 /// let bytes = s.into_bytes();
849 ///
850 /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]);
851 /// ```
852 #[inline]
853 pub fn into_bytes(self) -> Vec<'bump, u8> {
854 self.vec
855 }
856
857 /// Convert this `String<'bump>` into a `&'bump str`. This is analagous to
858 /// `std::string::String::into_boxed_str`.
859 ///
860 /// # Example
861 ///
862 /// ```
863 /// use bumpalo::{Bump, collections::String};
864 ///
865 /// let b = Bump::new();
866 ///
867 /// let s = String::from_str_in("foo", &b);
868 /// let t = s.into_bump_str();
869 /// assert_eq!("foo", t);
870 /// ```
871 pub fn into_bump_str(self) -> &'bump str {
872 let s = unsafe {
873 let s = self.as_str();
874 mem::transmute(s)
875 };
876 mem::forget(self);
877 s
878 }
879
880 /// Extracts a string slice containing the entire `String`.
881 ///
882 /// # Examples
883 ///
884 /// Basic usage:
885 ///
886 /// ```
887 /// use bumpalo::{Bump, collections::String};
888 ///
889 /// let b = Bump::new();
890 ///
891 /// let s = String::from_str_in("foo", &b);
892 ///
893 /// assert_eq!("foo", s.as_str());
894 /// ```
895 #[inline]
896 pub fn as_str(&self) -> &str {
897 self
898 }
899
900 /// Converts a `String` into a mutable string slice.
901 ///
902 /// # Examples
903 ///
904 /// Basic usage:
905 ///
906 /// ```
907 /// use bumpalo::{Bump, collections::String};
908 ///
909 /// let b = Bump::new();
910 ///
911 /// let mut s = String::from_str_in("foobar", &b);
912 /// let s_mut_str = s.as_mut_str();
913 ///
914 /// s_mut_str.make_ascii_uppercase();
915 ///
916 /// assert_eq!("FOOBAR", s_mut_str);
917 /// ```
918 #[inline]
919 pub fn as_mut_str(&mut self) -> &mut str {
920 self
921 }
922
923 /// Appends a given string slice onto the end of this `String`.
924 ///
925 /// # Examples
926 ///
927 /// Basic usage:
928 ///
929 /// ```
930 /// use bumpalo::{Bump, collections::String};
931 ///
932 /// let b = Bump::new();
933 ///
934 /// let mut s = String::from_str_in("foo", &b);
935 ///
936 /// s.push_str("bar");
937 ///
938 /// assert_eq!("foobar", s);
939 /// ```
940 #[inline]
941 pub fn push_str(&mut self, string: &str) {
942 self.vec.extend_from_slice(string.as_bytes())
943 }
944
945 /// Returns this `String`'s capacity, in bytes.
946 ///
947 /// # Examples
948 ///
949 /// Basic usage:
950 ///
951 /// ```
952 /// use bumpalo::{Bump, collections::String};
953 ///
954 /// let b = Bump::new();
955 ///
956 /// let s = String::with_capacity_in(10, &b);
957 ///
958 /// assert!(s.capacity() >= 10);
959 /// ```
960 #[inline]
961 pub fn capacity(&self) -> usize {
962 self.vec.capacity()
963 }
964
965 /// Ensures that this `String`'s capacity is at least `additional` bytes
966 /// larger than its length.
967 ///
968 /// The capacity may be increased by more than `additional` bytes if it
969 /// chooses, to prevent frequent reallocations.
970 ///
971 /// If you do not want this "at least" behavior, see the [`reserve_exact`]
972 /// method.
973 ///
974 /// # Panics
975 ///
976 /// Panics if the new capacity overflows [`usize`].
977 ///
978 /// [`reserve_exact`]: struct.String.html#method.reserve_exact
979 /// [`usize`]: https://doc.rust-lang.org/nightly/std/primitive.usize.html
980 ///
981 /// # Examples
982 ///
983 /// Basic usage:
984 ///
985 /// ```
986 /// use bumpalo::{Bump, collections::String};
987 ///
988 /// let b = Bump::new();
989 ///
990 /// let mut s = String::new_in(&b);
991 ///
992 /// s.reserve(10);
993 ///
994 /// assert!(s.capacity() >= 10);
995 /// ```
996 ///
997 /// This may not actually increase the capacity:
998 ///
999 /// ```
1000 /// use bumpalo::{Bump, collections::String};
1001 ///
1002 /// let b = Bump::new();
1003 ///
1004 /// let mut s = String::with_capacity_in(10, &b);
1005 /// s.push('a');
1006 /// s.push('b');
1007 ///
1008 /// // s now has a length of 2 and a capacity of 10
1009 /// assert_eq!(2, s.len());
1010 /// assert_eq!(10, s.capacity());
1011 ///
1012 /// // Since we already have an extra 8 capacity, calling this...
1013 /// s.reserve(8);
1014 ///
1015 /// // ... doesn't actually increase.
1016 /// assert_eq!(10, s.capacity());
1017 /// ```
1018 #[inline]
1019 pub fn reserve(&mut self, additional: usize) {
1020 self.vec.reserve(additional)
1021 }
1022
1023 /// Ensures that this `String`'s capacity is `additional` bytes
1024 /// larger than its length.
1025 ///
1026 /// Consider using the [`reserve`] method unless you absolutely know
1027 /// better than the allocator.
1028 ///
1029 /// [`reserve`]: #method.reserve
1030 ///
1031 /// # Panics
1032 ///
1033 /// Panics if the new capacity overflows `usize`.
1034 ///
1035 /// # Examples
1036 ///
1037 /// Basic usage:
1038 ///
1039 /// ```
1040 /// use bumpalo::{Bump, collections::String};
1041 ///
1042 /// let b = Bump::new();
1043 ///
1044 /// let mut s = String::new_in(&b);
1045 ///
1046 /// s.reserve_exact(10);
1047 ///
1048 /// assert!(s.capacity() >= 10);
1049 /// ```
1050 ///
1051 /// This may not actually increase the capacity:
1052 ///
1053 /// ```
1054 /// use bumpalo::{Bump, collections::String};
1055 ///
1056 /// let b = Bump::new();
1057 ///
1058 /// let mut s = String::with_capacity_in(10, &b);
1059 /// s.push('a');
1060 /// s.push('b');
1061 ///
1062 /// // s now has a length of 2 and a capacity of 10
1063 /// assert_eq!(2, s.len());
1064 /// assert_eq!(10, s.capacity());
1065 ///
1066 /// // Since we already have an extra 8 capacity, calling this...
1067 /// s.reserve_exact(8);
1068 ///
1069 /// // ... doesn't actually increase.
1070 /// assert_eq!(10, s.capacity());
1071 /// ```
1072 #[inline]
1073 pub fn reserve_exact(&mut self, additional: usize) {
1074 self.vec.reserve_exact(additional)
1075 }
1076
1077 /// Shrinks the capacity of this `String` to match its length.
1078 ///
1079 /// # Examples
1080 ///
1081 /// Basic usage:
1082 ///
1083 /// ```
1084 /// use bumpalo::{Bump, collections::String};
1085 ///
1086 /// let b = Bump::new();
1087 ///
1088 /// let mut s = String::from_str_in("foo", &b);
1089 ///
1090 /// s.reserve(100);
1091 /// assert!(s.capacity() >= 100);
1092 ///
1093 /// s.shrink_to_fit();
1094 /// assert_eq!(3, s.capacity());
1095 /// ```
1096 #[inline]
1097 pub fn shrink_to_fit(&mut self) {
1098 self.vec.shrink_to_fit()
1099 }
1100
1101 /// Appends the given [`char`] to the end of this `String`.
1102 ///
1103 /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1104 ///
1105 /// # Examples
1106 ///
1107 /// Basic usage:
1108 ///
1109 /// ```
1110 /// use bumpalo::{Bump, collections::String};
1111 ///
1112 /// let b = Bump::new();
1113 ///
1114 /// let mut s = String::from_str_in("abc", &b);
1115 ///
1116 /// s.push('1');
1117 /// s.push('2');
1118 /// s.push('3');
1119 ///
1120 /// assert_eq!("abc123", s);
1121 /// ```
1122 #[inline]
1123 pub fn push(&mut self, ch: char) {
1124 match ch.len_utf8() {
1125 1 => self.vec.push(ch as u8),
1126 _ => self
1127 .vec
1128 .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
1129 }
1130 }
1131
1132 /// Returns a byte slice of this `String`'s contents.
1133 ///
1134 /// The inverse of this method is [`from_utf8`].
1135 ///
1136 /// [`from_utf8`]: #method.from_utf8
1137 ///
1138 /// # Examples
1139 ///
1140 /// Basic usage:
1141 ///
1142 /// ```
1143 /// use bumpalo::{Bump, collections::String};
1144 ///
1145 /// let b = Bump::new();
1146 ///
1147 /// let s = String::from_str_in("hello", &b);
1148 ///
1149 /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
1150 /// ```
1151 #[inline]
1152 pub fn as_bytes(&self) -> &[u8] {
1153 &self.vec
1154 }
1155
1156 /// Shortens this `String` to the specified length.
1157 ///
1158 /// If `new_len` is greater than the string's current length, this has no
1159 /// effect.
1160 ///
1161 /// Note that this method has no effect on the allocated capacity
1162 /// of the string
1163 ///
1164 /// # Panics
1165 ///
1166 /// Panics if `new_len` does not lie on a [`char`] boundary.
1167 ///
1168 /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1169 ///
1170 /// # Examples
1171 ///
1172 /// Basic usage:
1173 ///
1174 /// ```
1175 /// use bumpalo::{Bump, collections::String};
1176 ///
1177 /// let b = Bump::new();
1178 ///
1179 /// let mut s = String::from_str_in("hello", &b);
1180 ///
1181 /// s.truncate(2);
1182 ///
1183 /// assert_eq!("he", s);
1184 /// ```
1185 #[inline]
1186 pub fn truncate(&mut self, new_len: usize) {
1187 if new_len <= self.len() {
1188 assert!(self.is_char_boundary(new_len));
1189 self.vec.truncate(new_len)
1190 }
1191 }
1192
1193 /// Removes the last character from the string buffer and returns it.
1194 ///
1195 /// Returns [`None`] if this `String` is empty.
1196 ///
1197 /// [`None`]: https://doc.rust-lang.org/nightly/std/option/enum.Option.html#variant.None
1198 ///
1199 /// # Examples
1200 ///
1201 /// Basic usage:
1202 ///
1203 /// ```
1204 /// use bumpalo::{Bump, collections::String};
1205 ///
1206 /// let b = Bump::new();
1207 ///
1208 /// let mut s = String::from_str_in("foo", &b);
1209 ///
1210 /// assert_eq!(s.pop(), Some('o'));
1211 /// assert_eq!(s.pop(), Some('o'));
1212 /// assert_eq!(s.pop(), Some('f'));
1213 ///
1214 /// assert_eq!(s.pop(), None);
1215 /// ```
1216 #[inline]
1217 pub fn pop(&mut self) -> Option<char> {
1218 let ch = self.chars().rev().next()?;
1219 let newlen = self.len() - ch.len_utf8();
1220 unsafe {
1221 self.vec.set_len(newlen);
1222 }
1223 Some(ch)
1224 }
1225
1226 /// Removes a [`char`] from this `String` at a byte position and returns it.
1227 ///
1228 /// This is an `O(n)` operation, as it requires copying every element in the
1229 /// buffer.
1230 ///
1231 /// # Panics
1232 ///
1233 /// Panics if `idx` is larger than or equal to the `String`'s length,
1234 /// or if it does not lie on a [`char`] boundary.
1235 ///
1236 /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1237 ///
1238 /// # Examples
1239 ///
1240 /// Basic usage:
1241 ///
1242 /// ```
1243 /// use bumpalo::{Bump, collections::String};
1244 ///
1245 /// let b = Bump::new();
1246 ///
1247 /// let mut s = String::from_str_in("foo", &b);
1248 ///
1249 /// assert_eq!(s.remove(0), 'f');
1250 /// assert_eq!(s.remove(1), 'o');
1251 /// assert_eq!(s.remove(0), 'o');
1252 /// ```
1253 #[inline]
1254 pub fn remove(&mut self, idx: usize) -> char {
1255 let ch = match self[idx..].chars().next() {
1256 Some(ch) => ch,
1257 None => panic!("cannot remove a char from the end of a string"),
1258 };
1259
1260 let next = idx + ch.len_utf8();
1261 let len = self.len();
1262 unsafe {
1263 ptr::copy(
1264 self.vec.as_ptr().add(next),
1265 self.vec.as_mut_ptr().add(idx),
1266 len - next,
1267 );
1268 self.vec.set_len(len - (next - idx));
1269 }
1270 ch
1271 }
1272
1273 /// Retains only the characters specified by the predicate.
1274 ///
1275 /// In other words, remove all characters `c` such that `f(c)` returns `false`.
1276 /// This method operates in place and preserves the order of the retained
1277 /// characters.
1278 ///
1279 /// # Examples
1280 ///
1281 /// ```
1282 /// use bumpalo::{Bump, collections::String};
1283 ///
1284 /// let b = Bump::new();
1285 ///
1286 /// let mut s = String::from_str_in("f_o_ob_ar", &b);
1287 ///
1288 /// s.retain(|c| c != '_');
1289 ///
1290 /// assert_eq!(s, "foobar");
1291 /// ```
1292 #[inline]
1293 pub fn retain<F>(&mut self, mut f: F)
1294 where
1295 F: FnMut(char) -> bool,
1296 {
1297 let len = self.len();
1298 let mut del_bytes = 0;
1299 let mut idx = 0;
1300
1301 while idx < len {
1302 let ch = unsafe { self.get_unchecked(idx..len).chars().next().unwrap() };
1303 let ch_len = ch.len_utf8();
1304
1305 if !f(ch) {
1306 del_bytes += ch_len;
1307 } else if del_bytes > 0 {
1308 unsafe {
1309 ptr::copy(
1310 self.vec.as_ptr().add(idx),
1311 self.vec.as_mut_ptr().add(idx - del_bytes),
1312 ch_len,
1313 );
1314 }
1315 }
1316
1317 // Point idx to the next char
1318 idx += ch_len;
1319 }
1320
1321 if del_bytes > 0 {
1322 unsafe {
1323 self.vec.set_len(len - del_bytes);
1324 }
1325 }
1326 }
1327
1328 /// Inserts a character into this `String` at a byte position.
1329 ///
1330 /// This is an `O(n)` operation as it requires copying every element in the
1331 /// buffer.
1332 ///
1333 /// # Panics
1334 ///
1335 /// Panics if `idx` is larger than the `String`'s length, or if it does not
1336 /// lie on a [`char`] boundary.
1337 ///
1338 /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1339 ///
1340 /// # Examples
1341 ///
1342 /// Basic usage:
1343 ///
1344 /// ```
1345 /// use bumpalo::{Bump, collections::String};
1346 ///
1347 /// let b = Bump::new();
1348 ///
1349 /// let mut s = String::with_capacity_in(3, &b);
1350 ///
1351 /// s.insert(0, 'f');
1352 /// s.insert(1, 'o');
1353 /// s.insert(2, 'o');
1354 ///
1355 /// assert_eq!("foo", s);
1356 /// ```
1357 #[inline]
1358 pub fn insert(&mut self, idx: usize, ch: char) {
1359 assert!(self.is_char_boundary(idx));
1360 let mut bits = [0; 4];
1361 let bits = ch.encode_utf8(&mut bits).as_bytes();
1362
1363 unsafe {
1364 self.insert_bytes(idx, bits);
1365 }
1366 }
1367
1368 unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) {
1369 let len = self.len();
1370 let amt = bytes.len();
1371 self.vec.reserve(amt);
1372
1373 ptr::copy(
1374 self.vec.as_ptr().add(idx),
1375 self.vec.as_mut_ptr().add(idx + amt),
1376 len - idx,
1377 );
1378 ptr::copy(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt);
1379 self.vec.set_len(len + amt);
1380 }
1381
1382 /// Inserts a string slice into this `String` at a byte position.
1383 ///
1384 /// This is an `O(n)` operation as it requires copying every element in the
1385 /// buffer.
1386 ///
1387 /// # Panics
1388 ///
1389 /// Panics if `idx` is larger than the `String`'s length, or if it does not
1390 /// lie on a [`char`] boundary.
1391 ///
1392 /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1393 ///
1394 /// # Examples
1395 ///
1396 /// Basic usage:
1397 ///
1398 /// ```
1399 /// use bumpalo::{Bump, collections::String};
1400 ///
1401 /// let b = Bump::new();
1402 ///
1403 /// let mut s = String::from_str_in("bar", &b);
1404 ///
1405 /// s.insert_str(0, "foo");
1406 ///
1407 /// assert_eq!("foobar", s);
1408 /// ```
1409 #[inline]
1410 pub fn insert_str(&mut self, idx: usize, string: &str) {
1411 assert!(self.is_char_boundary(idx));
1412
1413 unsafe {
1414 self.insert_bytes(idx, string.as_bytes());
1415 }
1416 }
1417
1418 /// Returns a mutable reference to the contents of this `String`.
1419 ///
1420 /// # Safety
1421 ///
1422 /// This function is unsafe because it does not check that the bytes passed
1423 /// to it are valid UTF-8. If this constraint is violated, it may cause
1424 /// memory unsafety issues with future users of the `String`, as the rest of
1425 /// the standard library assumes that `String`s are valid UTF-8.
1426 ///
1427 /// # Examples
1428 ///
1429 /// Basic usage:
1430 ///
1431 /// ```
1432 /// use bumpalo::{Bump, collections::String};
1433 ///
1434 /// let b = Bump::new();
1435 ///
1436 /// let mut s = String::from_str_in("hello", &b);
1437 ///
1438 /// unsafe {
1439 /// let vec = s.as_mut_vec();
1440 /// assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]);
1441 ///
1442 /// vec.reverse();
1443 /// }
1444 /// assert_eq!(s, "olleh");
1445 /// ```
1446 #[inline]
1447 pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<'bump, u8> {
1448 &mut self.vec
1449 }
1450
1451 /// Returns the length of this `String`, in bytes.
1452 ///
1453 /// # Examples
1454 ///
1455 /// Basic usage:
1456 ///
1457 /// ```
1458 /// use bumpalo::{Bump, collections::String};
1459 ///
1460 /// let b = Bump::new();
1461 ///
1462 /// let a = String::from_str_in("foo", &b);
1463 ///
1464 /// assert_eq!(a.len(), 3);
1465 /// ```
1466 #[inline]
1467 pub fn len(&self) -> usize {
1468 self.vec.len()
1469 }
1470
1471 /// Returns `true` if this `String` has a length of zero.
1472 ///
1473 /// Returns `false` otherwise.
1474 ///
1475 /// # Examples
1476 ///
1477 /// Basic usage:
1478 ///
1479 /// ```
1480 /// use bumpalo::{Bump, collections::String};
1481 ///
1482 /// let b = Bump::new();
1483 ///
1484 /// let mut v = String::new_in(&b);
1485 /// assert!(v.is_empty());
1486 ///
1487 /// v.push('a');
1488 /// assert!(!v.is_empty());
1489 /// ```
1490 #[inline]
1491 pub fn is_empty(&self) -> bool {
1492 self.len() == 0
1493 }
1494
1495 /// Splits the string into two at the given index.
1496 ///
1497 /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and
1498 /// the returned `String` contains bytes `[at, len)`. `at` must be on the
1499 /// boundary of a UTF-8 code point.
1500 ///
1501 /// Note that the capacity of `self` does not change.
1502 ///
1503 /// # Panics
1504 ///
1505 /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last
1506 /// code point of the string.
1507 ///
1508 /// # Examples
1509 ///
1510 /// ```
1511 /// use bumpalo::{Bump, collections::String};
1512 ///
1513 /// let b = Bump::new();
1514 ///
1515 /// let mut hello = String::from_str_in("Hello, World!", &b);
1516 /// let world = hello.split_off(7);
1517 /// assert_eq!(hello, "Hello, ");
1518 /// assert_eq!(world, "World!");
1519 /// ```
1520 #[inline]
1521 pub fn split_off(&mut self, at: usize) -> String<'bump> {
1522 assert!(self.is_char_boundary(at));
1523 let other = self.vec.split_off(at);
1524 unsafe { String::from_utf8_unchecked(other) }
1525 }
1526
1527 /// Truncates this `String`, removing all contents.
1528 ///
1529 /// While this means the `String` will have a length of zero, it does not
1530 /// touch its capacity.
1531 ///
1532 /// # Examples
1533 ///
1534 /// Basic usage:
1535 ///
1536 /// ```
1537 /// use bumpalo::{Bump, collections::String};
1538 ///
1539 /// let b = Bump::new();
1540 ///
1541 /// let mut s = String::from_str_in("foo", &b);
1542 ///
1543 /// s.clear();
1544 ///
1545 /// assert!(s.is_empty());
1546 /// assert_eq!(0, s.len());
1547 /// assert_eq!(3, s.capacity());
1548 /// ```
1549 #[inline]
1550 pub fn clear(&mut self) {
1551 self.vec.clear()
1552 }
1553
1554 /// Creates a draining iterator that removes the specified range in the `String`
1555 /// and yields the removed `chars`.
1556 ///
1557 /// Note: The element range is removed even if the iterator is not
1558 /// consumed until the end.
1559 ///
1560 /// # Panics
1561 ///
1562 /// Panics if the starting point or end point do not lie on a [`char`]
1563 /// boundary, or if they're out of bounds.
1564 ///
1565 /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1566 ///
1567 /// # Examples
1568 ///
1569 /// Basic usage:
1570 ///
1571 /// ```
1572 /// use bumpalo::{Bump, collections::String};
1573 ///
1574 /// let b = Bump::new();
1575 ///
1576 /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1577 /// let beta_offset = s.find('β').unwrap_or(s.len());
1578 ///
1579 /// // Remove the range up until the β from the string
1580 /// let t = String::from_iter_in(s.drain(..beta_offset), &b);
1581 /// assert_eq!(t, "α is alpha, ");
1582 /// assert_eq!(s, "β is beta");
1583 ///
1584 /// // A full range clears the string
1585 /// s.drain(..);
1586 /// assert_eq!(s, "");
1587 /// ```
1588 pub fn drain<'a, R>(&'a mut self, range: R) -> Drain<'a, 'bump>
1589 where
1590 R: RangeBounds<usize>,
1591 {
1592 // Memory safety
1593 //
1594 // The String version of Drain does not have the memory safety issues
1595 // of the vector version. The data is just plain bytes.
1596 // Because the range removal happens in Drop, if the Drain iterator is leaked,
1597 // the removal will not happen.
1598 let len = self.len();
1599 let start = match range.start_bound() {
1600 Included(&n) => n,
1601 Excluded(&n) => n + 1,
1602 Unbounded => 0,
1603 };
1604 let end = match range.end_bound() {
1605 Included(&n) => n + 1,
1606 Excluded(&n) => n,
1607 Unbounded => len,
1608 };
1609
1610 // Take out two simultaneous borrows. The &mut String won't be accessed
1611 // until iteration is over, in Drop.
1612 let self_ptr = self as *mut _;
1613 // slicing does the appropriate bounds checks
1614 let chars_iter = self[start..end].chars();
1615
1616 Drain {
1617 start,
1618 end,
1619 iter: chars_iter,
1620 string: self_ptr,
1621 }
1622 }
1623
1624 /// Removes the specified range in the string,
1625 /// and replaces it with the given string.
1626 /// The given string doesn't need to be the same length as the range.
1627 ///
1628 /// # Panics
1629 ///
1630 /// Panics if the starting point or end point do not lie on a [`char`]
1631 /// boundary, or if they're out of bounds.
1632 ///
1633 /// [`char`]: https://doc.rust-lang.org/nightly/std/primitive.char.html
1634 /// [`Vec::splice`]: ../vec/struct.Vec.html#method.splice
1635 ///
1636 /// # Examples
1637 ///
1638 /// Basic usage:
1639 ///
1640 /// ```
1641 /// use bumpalo::{Bump, collections::String};
1642 ///
1643 /// let b = Bump::new();
1644 ///
1645 /// let mut s = String::from_str_in("α is alpha, β is beta", &b);
1646 /// let beta_offset = s.find('β').unwrap_or(s.len());
1647 ///
1648 /// // Replace the range up until the β from the string
1649 /// s.replace_range(..beta_offset, "Α is capital alpha; ");
1650 /// assert_eq!(s, "Α is capital alpha; β is beta");
1651 /// ```
1652 pub fn replace_range<R>(&mut self, range: R, replace_with: &str)
1653 where
1654 R: RangeBounds<usize>,
1655 {
1656 // Memory safety
1657 //
1658 // Replace_range does not have the memory safety issues of a vector Splice.
1659 // of the vector version. The data is just plain bytes.
1660
1661 match range.start_bound() {
1662 Included(&n) => assert!(self.is_char_boundary(n)),
1663 Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
1664 Unbounded => {}
1665 };
1666 match range.end_bound() {
1667 Included(&n) => assert!(self.is_char_boundary(n + 1)),
1668 Excluded(&n) => assert!(self.is_char_boundary(n)),
1669 Unbounded => {}
1670 };
1671
1672 unsafe { self.as_mut_vec() }.splice(range, replace_with.bytes());
1673 }
1674}
1675
1676impl<'bump> FromUtf8Error<'bump> {
1677 /// Returns a slice of [`u8`]s bytes that were attempted to convert to a `String`.
1678 ///
1679 /// # Examples
1680 ///
1681 /// Basic usage:
1682 ///
1683 /// ```
1684 /// use bumpalo::{Bump, collections::String};
1685 ///
1686 /// let b = Bump::new();
1687 ///
1688 /// // some invalid bytes, in a vector
1689 /// let bytes = bumpalo::vec![in &b; 0, 159];
1690 ///
1691 /// let value = String::from_utf8(bytes);
1692 ///
1693 /// assert_eq!(&[0, 159], value.unwrap_err().as_bytes());
1694 /// ```
1695 pub fn as_bytes(&self) -> &[u8] {
1696 &self.bytes[..]
1697 }
1698
1699 /// Returns the bytes that were attempted to convert to a `String`.
1700 ///
1701 /// This method is carefully constructed to avoid allocation. It will
1702 /// consume the error, moving out the bytes, so that a copy of the bytes
1703 /// does not need to be made.
1704 ///
1705 /// # Examples
1706 ///
1707 /// Basic usage:
1708 ///
1709 /// ```
1710 /// use bumpalo::{Bump, collections::String};
1711 ///
1712 /// let b = Bump::new();
1713 ///
1714 /// // some invalid bytes, in a vector
1715 /// let bytes = bumpalo::vec![in &b; 0, 159];
1716 ///
1717 /// let value = String::from_utf8(bytes);
1718 ///
1719 /// assert_eq!(bumpalo::vec![in &b; 0, 159], value.unwrap_err().into_bytes());
1720 /// ```
1721 pub fn into_bytes(self) -> Vec<'bump, u8> {
1722 self.bytes
1723 }
1724
1725 /// Fetch a `Utf8Error` to get more details about the conversion failure.
1726 ///
1727 /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
1728 /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
1729 /// an analogue to `FromUtf8Error`. See its documentation for more details
1730 /// on using it.
1731 ///
1732 /// [`Utf8Error`]: https://doc.rust-lang.org/nightly/std/str/struct.Utf8Error.html
1733 /// [`std::str`]: https://doc.rust-lang.org/nightly/std/str/index.html
1734 /// [`u8`]: https://doc.rust-lang.org/nightly/std/primitive.u8.html
1735 /// [`&str`]: https://doc.rust-lang.org/nightly/std/primitive.str.html
1736 ///
1737 /// # Examples
1738 ///
1739 /// Basic usage:
1740 ///
1741 /// ```
1742 /// use bumpalo::{Bump, collections::String};
1743 ///
1744 /// let b = Bump::new();
1745 ///
1746 /// // some invalid bytes, in a vector
1747 /// let bytes = bumpalo::vec![in &b; 0, 159];
1748 ///
1749 /// let error = String::from_utf8(bytes).unwrap_err().utf8_error();
1750 ///
1751 /// // the first byte is invalid here
1752 /// assert_eq!(1, error.valid_up_to());
1753 /// ```
1754 pub fn utf8_error(&self) -> Utf8Error {
1755 self.error
1756 }
1757}
1758
1759impl<'bump> fmt::Display for FromUtf8Error<'bump> {
1760 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1761 fmt::Display::fmt(&self.error, f)
1762 }
1763}
1764
1765impl fmt::Display for FromUtf16Error {
1766 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1767 fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
1768 }
1769}
1770
1771impl<'bump> Clone for String<'bump> {
1772 fn clone(&self) -> Self {
1773 String {
1774 vec: self.vec.clone(),
1775 }
1776 }
1777
1778 fn clone_from(&mut self, source: &Self) {
1779 self.vec.clone_from(&source.vec);
1780 }
1781}
1782
1783impl<'bump> Extend<char> for String<'bump> {
1784 fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) {
1785 let iterator = iter.into_iter();
1786 let (lower_bound, _) = iterator.size_hint();
1787 self.reserve(lower_bound);
1788 for ch in iterator {
1789 self.push(ch)
1790 }
1791 }
1792}
1793
1794impl<'a, 'bump> Extend<&'a char> for String<'bump> {
1795 fn extend<I: IntoIterator<Item = &'a char>>(&mut self, iter: I) {
1796 self.extend(iter.into_iter().cloned());
1797 }
1798}
1799
1800impl<'a, 'bump> Extend<&'a str> for String<'bump> {
1801 fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) {
1802 for s in iter {
1803 self.push_str(s)
1804 }
1805 }
1806}
1807
1808impl<'bump> Extend<String<'bump>> for String<'bump> {
1809 fn extend<I: IntoIterator<Item = String<'bump>>>(&mut self, iter: I) {
1810 for s in iter {
1811 self.push_str(&s)
1812 }
1813 }
1814}
1815
1816impl<'bump> Extend<core_alloc::string::String> for String<'bump> {
1817 fn extend<I: IntoIterator<Item = core_alloc::string::String>>(&mut self, iter: I) {
1818 for s in iter {
1819 self.push_str(&s)
1820 }
1821 }
1822}
1823
1824impl<'a, 'bump> Extend<Cow<'a, str>> for String<'bump> {
1825 fn extend<I: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: I) {
1826 for s in iter {
1827 self.push_str(&s)
1828 }
1829 }
1830}
1831
1832impl<'bump> PartialEq for String<'bump> {
1833 #[inline]
1834 fn eq(&self, other: &String) -> bool {
1835 PartialEq::eq(&self[..], &other[..])
1836 }
1837}
1838
1839macro_rules! impl_eq {
1840 ($lhs:ty, $rhs: ty) => {
1841 impl<'a, 'bump> PartialEq<$rhs> for $lhs {
1842 #[inline]
1843 fn eq(&self, other: &$rhs) -> bool {
1844 PartialEq::eq(&self[..], &other[..])
1845 }
1846 }
1847
1848 impl<'a, 'b, 'bump> PartialEq<$lhs> for $rhs {
1849 #[inline]
1850 fn eq(&self, other: &$lhs) -> bool {
1851 PartialEq::eq(&self[..], &other[..])
1852 }
1853 }
1854 };
1855}
1856
1857impl_eq! { String<'bump>, str }
1858impl_eq! { String<'bump>, &'a str }
1859impl_eq! { Cow<'a, str>, String<'bump> }
1860impl_eq! { core_alloc::string::String, String<'bump> }
1861
1862impl<'bump> fmt::Display for String<'bump> {
1863 #[inline]
1864 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1865 fmt::Display::fmt(&**self, f)
1866 }
1867}
1868
1869impl<'bump> fmt::Debug for String<'bump> {
1870 #[inline]
1871 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1872 fmt::Debug::fmt(&**self, f)
1873 }
1874}
1875
1876impl<'bump> hash::Hash for String<'bump> {
1877 #[inline]
1878 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
1879 (**self).hash(hasher)
1880 }
1881}
1882
1883/// Implements the `+` operator for concatenating two strings.
1884///
1885/// This consumes the `String<'bump>` on the left-hand side and re-uses its buffer (growing it if
1886/// necessary). This is done to avoid allocating a new `String<'bump>` and copying the entire contents on
1887/// every operation, which would lead to `O(n^2)` running time when building an `n`-byte string by
1888/// repeated concatenation.
1889///
1890/// The string on the right-hand side is only borrowed; its contents are copied into the returned
1891/// `String<'bump>`.
1892///
1893/// # Examples
1894///
1895/// Concatenating two `String<'bump>`s takes the first by value and borrows the second:
1896///
1897/// ```
1898/// use bumpalo::{Bump, collections::String};
1899///
1900/// let bump = Bump::new();
1901///
1902/// let a = String::from_str_in("hello", &bump);
1903/// let b = String::from_str_in(" world", &bump);
1904/// let c = a + &b;
1905/// // `a` is moved and can no longer be used here.
1906/// ```
1907///
1908/// If you want to keep using the first `String`, you can clone it and append to the clone instead:
1909///
1910/// ```
1911/// use bumpalo::{Bump, collections::String};
1912///
1913/// let bump = Bump::new();
1914///
1915/// let a = String::from_str_in("hello", &bump);
1916/// let b = String::from_str_in(" world", &bump);
1917/// let c = a.clone() + &b;
1918/// // `a` is still valid here.
1919/// ```
1920///
1921/// Concatenating `&str` slices can be done by converting the first to a `String`:
1922///
1923/// ```
1924/// use bumpalo::{Bump, collections::String};
1925///
1926/// let b = Bump::new();
1927///
1928/// let a = "hello";
1929/// let b = " world";
1930/// let c = a.to_string() + b;
1931/// ```
1932impl<'a, 'bump> Add<&'a str> for String<'bump> {
1933 type Output = String<'bump>;
1934
1935 #[inline]
1936 fn add(mut self, other: &str) -> String<'bump> {
1937 self.push_str(other);
1938 self
1939 }
1940}
1941
1942/// Implements the `+=` operator for appending to a `String<'bump>`.
1943///
1944/// This has the same behavior as the [`push_str`][String::push_str] method.
1945impl<'a, 'bump> AddAssign<&'a str> for String<'bump> {
1946 #[inline]
1947 fn add_assign(&mut self, other: &str) {
1948 self.push_str(other);
1949 }
1950}
1951
1952impl<'bump> ops::Index<ops::Range<usize>> for String<'bump> {
1953 type Output = str;
1954
1955 #[inline]
1956 fn index(&self, index: ops::Range<usize>) -> &str {
1957 &self[..][index]
1958 }
1959}
1960impl<'bump> ops::Index<ops::RangeTo<usize>> for String<'bump> {
1961 type Output = str;
1962
1963 #[inline]
1964 fn index(&self, index: ops::RangeTo<usize>) -> &str {
1965 &self[..][index]
1966 }
1967}
1968impl<'bump> ops::Index<ops::RangeFrom<usize>> for String<'bump> {
1969 type Output = str;
1970
1971 #[inline]
1972 fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1973 &self[..][index]
1974 }
1975}
1976impl<'bump> ops::Index<ops::RangeFull> for String<'bump> {
1977 type Output = str;
1978
1979 #[inline]
1980 fn index(&self, _index: ops::RangeFull) -> &str {
1981 unsafe { str::from_utf8_unchecked(&self.vec) }
1982 }
1983}
1984impl<'bump> ops::Index<ops::RangeInclusive<usize>> for String<'bump> {
1985 type Output = str;
1986
1987 #[inline]
1988 fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1989 Index::index(&**self, index)
1990 }
1991}
1992impl<'bump> ops::Index<ops::RangeToInclusive<usize>> for String<'bump> {
1993 type Output = str;
1994
1995 #[inline]
1996 fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1997 Index::index(&**self, index)
1998 }
1999}
2000
2001impl<'bump> ops::IndexMut<ops::Range<usize>> for String<'bump> {
2002 #[inline]
2003 fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
2004 &mut self[..][index]
2005 }
2006}
2007impl<'bump> ops::IndexMut<ops::RangeTo<usize>> for String<'bump> {
2008 #[inline]
2009 fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
2010 &mut self[..][index]
2011 }
2012}
2013impl<'bump> ops::IndexMut<ops::RangeFrom<usize>> for String<'bump> {
2014 #[inline]
2015 fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
2016 &mut self[..][index]
2017 }
2018}
2019impl<'bump> ops::IndexMut<ops::RangeFull> for String<'bump> {
2020 #[inline]
2021 fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
2022 unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2023 }
2024}
2025impl<'bump> ops::IndexMut<ops::RangeInclusive<usize>> for String<'bump> {
2026 #[inline]
2027 fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
2028 IndexMut::index_mut(&mut **self, index)
2029 }
2030}
2031impl<'bump> ops::IndexMut<ops::RangeToInclusive<usize>> for String<'bump> {
2032 #[inline]
2033 fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
2034 IndexMut::index_mut(&mut **self, index)
2035 }
2036}
2037
2038impl<'bump> ops::Deref for String<'bump> {
2039 type Target = str;
2040
2041 #[inline]
2042 fn deref(&self) -> &str {
2043 unsafe { str::from_utf8_unchecked(&self.vec) }
2044 }
2045}
2046
2047impl<'bump> ops::DerefMut for String<'bump> {
2048 #[inline]
2049 fn deref_mut(&mut self) -> &mut str {
2050 unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
2051 }
2052}
2053
2054impl<'bump> AsRef<str> for String<'bump> {
2055 #[inline]
2056 fn as_ref(&self) -> &str {
2057 self
2058 }
2059}
2060
2061impl<'bump> AsRef<[u8]> for String<'bump> {
2062 #[inline]
2063 fn as_ref(&self) -> &[u8] {
2064 self.as_bytes()
2065 }
2066}
2067
2068impl<'bump> fmt::Write for String<'bump> {
2069 #[inline]
2070 fn write_str(&mut self, s: &str) -> fmt::Result {
2071 self.push_str(s);
2072 Ok(())
2073 }
2074
2075 #[inline]
2076 fn write_char(&mut self, c: char) -> fmt::Result {
2077 self.push(c);
2078 Ok(())
2079 }
2080}
2081
2082/// A draining iterator for `String`.
2083///
2084/// This struct is created by the [`drain`] method on [`String`]. See its
2085/// documentation for more.
2086///
2087/// [`drain`]: struct.String.html#method.drain
2088/// [`String`]: struct.String.html
2089pub struct Drain<'a, 'bump> {
2090 /// Will be used as &'a mut String in the destructor
2091 string: *mut String<'bump>,
2092 /// Start of part to remove
2093 start: usize,
2094 /// End of part to remove
2095 end: usize,
2096 /// Current remaining range to remove
2097 iter: Chars<'a>,
2098}
2099
2100impl<'a, 'bump> fmt::Debug for Drain<'a, 'bump> {
2101 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2102 f.pad("Drain { .. }")
2103 }
2104}
2105
2106unsafe impl<'a, 'bump> Sync for Drain<'a, 'bump> {}
2107unsafe impl<'a, 'bump> Send for Drain<'a, 'bump> {}
2108
2109impl<'a, 'bump> Drop for Drain<'a, 'bump> {
2110 fn drop(&mut self) {
2111 unsafe {
2112 // Use Vec::drain. "Reaffirm" the bounds checks to avoid
2113 // panic code being inserted again.
2114 let self_vec = (*self.string).as_mut_vec();
2115 if self.start <= self.end && self.end <= self_vec.len() {
2116 self_vec.drain(self.start..self.end);
2117 }
2118 }
2119 }
2120}
2121
2122impl<'a, 'bump> Iterator for Drain<'a, 'bump> {
2123 type Item = char;
2124
2125 #[inline]
2126 fn next(&mut self) -> Option<char> {
2127 self.iter.next()
2128 }
2129
2130 fn size_hint(&self) -> (usize, Option<usize>) {
2131 self.iter.size_hint()
2132 }
2133}
2134
2135impl<'a, 'bump> DoubleEndedIterator for Drain<'a, 'bump> {
2136 #[inline]
2137 fn next_back(&mut self) -> Option<char> {
2138 self.iter.next_back()
2139 }
2140}
2141
2142impl<'a, 'bump> FusedIterator for Drain<'a, 'bump> {}