flyweights/
lib.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Types implementing the [flyweight pattern](https://en.wikipedia.org/wiki/Flyweight_pattern)
6//! for reusing string allocations.
7//!
8//! # Features
9//!
10//! * Supports UTF-8 strings with [`FlyStr`] and UTF-8-ish bytestrings with [`FlyByteStr`].
11//! * Easy drop-in replacement for immutable strings without wiring up any additional function args.
12//! * Accessing the underlying string values has overhead similar to a `Box<str>`.
13//! * Cheap to clone.
14//! * Strings in the cache are freed when the last reference to them is dropped.
15//! * Heap allocations are avoided when possible with small string optimizations (SSO).
16//! * Hashing a flyweight and comparing it for equality against another flyweight are both O(1)
17//!   operations.
18//!
19//! # Tradeoffs
20//!
21//! This was originally written for [Fuchsia](https://fuchsia.dev) at a time when popular options
22//! didn't fit the needs of adding caching to an existing long-running multithreaded system service
23//! that holds an unbounded number of user-controlled strings. The above features are suited to this
24//! use case, but there are many (many) alternative [string caching crates] to choose from if you
25//! have different constraints.
26//!
27//! [string caching crates]: https://crates.io/search?q=intern
28
29#![warn(missing_docs, clippy::all)]
30
31mod raw;
32
33use ahash::AHashSet;
34use bstr::{BStr, BString};
35use std::borrow::Borrow;
36use std::fmt::{Debug, Display, Formatter, Result as FmtResult};
37use std::hash::{Hash, Hasher};
38use std::ops::Deref;
39use std::ptr::NonNull;
40use std::sync::Mutex;
41
42#[cfg(feature = "serde")]
43use serde::de::{Deserializer, Visitor};
44#[cfg(feature = "serde")]
45use serde::ser::Serializer;
46#[cfg(feature = "serde")]
47use serde::{Deserialize, Serialize};
48
49/// The global string cache for `FlyStr`.
50///
51/// If a live `FlyStr` contains an `Storage`, the `Storage` must also be in this cache and it must
52/// have a refcount of >= 2.
53static CACHE: std::sync::LazyLock<Mutex<AHashSet<Storage>>> =
54    std::sync::LazyLock::new(|| Mutex::new(AHashSet::new()));
55
56/// Wrapper type for stored strings that lets us query the cache without an owned value.
57#[repr(transparent)]
58struct Storage(NonNull<raw::Payload>);
59
60// SAFETY: FlyStr storage is always safe to send across threads.
61unsafe impl Send for Storage {}
62// SAFETY: FlyStr storage is always safe to share across threads.
63unsafe impl Sync for Storage {}
64
65impl PartialEq for Storage {
66    #[inline]
67    fn eq(&self, other: &Self) -> bool {
68        self.as_bytes() == other.as_bytes()
69    }
70}
71
72impl Eq for Storage {}
73
74impl Hash for Storage {
75    #[inline]
76    fn hash<H: Hasher>(&self, state: &mut H) {
77        self.as_bytes().hash(state)
78    }
79}
80
81impl Storage {
82    #[inline]
83    fn inc_ref(&self) -> usize {
84        // SAFETY: `Storage` always points to a valid `Payload`.
85        unsafe { raw::Payload::inc_ref(self.0.as_ptr()) }
86    }
87
88    #[inline]
89    fn as_bytes(&self) -> &[u8] {
90        // SAFETY: `Storage` always points to a valid `Payload`.
91        unsafe { &*raw::Payload::bytes(self.0.as_ptr()) }
92    }
93}
94
95impl Borrow<[u8]> for Storage {
96    #[inline]
97    fn borrow(&self) -> &[u8] {
98        self.as_bytes()
99    }
100}
101
102/// An immutable string type which only stores a single copy of each string allocated. Internally
103/// represented as a shared pointer to the backing allocation. Occupies a single pointer width.
104///
105/// # Small strings
106///
107/// Very short strings are stored inline in the pointer with bit-tagging, so no allocations are
108/// performed.
109///
110/// # Performance
111///
112/// It's slower to construct than a regular `String` but trades that for reduced standing memory
113/// usage by deduplicating strings. `PartialEq` and `Hash` are implemented on the underlying pointer
114/// value rather than the pointed-to data for faster equality comparisons and indexing, which is
115/// sound by virtue of the type guaranteeing that only one `FlyStr` pointer value will exist at
116/// any time for a given string's contents.
117///
118/// As with any performance optimization, you should only use this type if you can measure the
119/// benefit it provides to your program. Pay careful attention to creating `FlyStr`s in hot paths
120/// as it may regress runtime performance.
121///
122/// # Allocation lifecycle
123///
124/// Intended for long-running system services with user-provided values, `FlyStr`s are removed from
125/// the global cache when the last reference to them is dropped. While this incurs some overhead
126/// it is important to prevent the value cache from becoming a denial-of-service vector.
127#[derive(Clone, Eq, Hash, PartialEq)]
128pub struct FlyStr(RawRepr);
129
130#[cfg(feature = "json_schema")]
131impl schemars::JsonSchema for FlyStr {
132    fn schema_name() -> String {
133        str::schema_name()
134    }
135
136    fn json_schema(generator: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
137        str::json_schema(generator)
138    }
139
140    fn is_referenceable() -> bool {
141        false
142    }
143}
144
145static_assertions::assert_eq_size!(FlyStr, usize);
146
147// `Borrow` requires that:
148//
149// > Eq, Ord and Hash must be equivalent for borrowed and owned values: x.borrow() == y.borrow()
150// > should give the same result as x == y.
151//
152// The current implementation of `FlyStr` cannot satisfy `Borrow<str>` because the O(1) hashing
153// feature means that `s.hash() != s.borrow().hash()`.
154static_assertions::assert_not_impl_any!(FlyStr: Borrow<str>);
155
156impl FlyStr {
157    /// Create a `FlyStr`, allocating it in the cache if the value is not already cached.
158    ///
159    /// # Performance
160    ///
161    /// Creating an instance of this type for strings longer than the maximum inline size requires
162    /// accessing the global cache of strings, which involves taking a lock. When multiple threads
163    /// are allocating lots of strings there may be contention. Each string allocated is hashed for
164    /// lookup in the cache.
165    #[inline]
166    pub fn new(s: impl AsRef<str> + Into<String>) -> Self {
167        Self(RawRepr::new_str(s))
168    }
169
170    /// Returns the underlying string slice.
171    #[inline]
172    pub fn as_str(&self) -> &str {
173        // SAFETY: Every FlyStr is constructed from valid UTF-8 bytes.
174        unsafe { std::str::from_utf8_unchecked(self.0.as_bytes()) }
175    }
176}
177
178impl Default for FlyStr {
179    #[inline]
180    fn default() -> Self {
181        Self::new("")
182    }
183}
184
185impl From<&'_ str> for FlyStr {
186    #[inline]
187    fn from(s: &str) -> Self {
188        Self::new(s)
189    }
190}
191
192impl From<&'_ String> for FlyStr {
193    #[inline]
194    fn from(s: &String) -> Self {
195        Self::new(&**s)
196    }
197}
198
199impl From<String> for FlyStr {
200    #[inline]
201    fn from(s: String) -> Self {
202        Self::new(s)
203    }
204}
205
206impl From<Box<str>> for FlyStr {
207    #[inline]
208    fn from(s: Box<str>) -> Self {
209        Self::new(s)
210    }
211}
212
213impl From<&Box<str>> for FlyStr {
214    #[inline]
215    fn from(s: &Box<str>) -> Self {
216        Self::new(&**s)
217    }
218}
219
220impl TryFrom<FlyByteStr> for FlyStr {
221    type Error = std::str::Utf8Error;
222
223    #[inline]
224    fn try_from(b: FlyByteStr) -> Result<FlyStr, Self::Error> {
225        // The internals of both FlyStr and FlyByteStr are the same, but it's only sound to return
226        // a FlyStr if the RawRepr contains/points to valid UTF-8.
227        std::str::from_utf8(b.as_bytes())?;
228        Ok(FlyStr(b.0))
229    }
230}
231
232impl From<FlyStr> for String {
233    #[inline]
234    fn from(s: FlyStr) -> String {
235        s.as_str().to_owned()
236    }
237}
238
239impl From<&'_ FlyStr> for String {
240    #[inline]
241    fn from(s: &FlyStr) -> String {
242        s.as_str().to_owned()
243    }
244}
245
246impl Deref for FlyStr {
247    type Target = str;
248
249    #[inline]
250    fn deref(&self) -> &Self::Target {
251        self.as_str()
252    }
253}
254
255impl AsRef<str> for FlyStr {
256    #[inline]
257    fn as_ref(&self) -> &str {
258        self.as_str()
259    }
260}
261
262impl PartialOrd for FlyStr {
263    #[inline]
264    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
265        Some(self.cmp(other))
266    }
267}
268impl Ord for FlyStr {
269    #[inline]
270    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
271        self.as_str().cmp(other.as_str())
272    }
273}
274
275impl PartialEq<str> for FlyStr {
276    #[inline]
277    fn eq(&self, other: &str) -> bool {
278        self.as_str() == other
279    }
280}
281
282impl PartialEq<&'_ str> for FlyStr {
283    #[inline]
284    fn eq(&self, other: &&str) -> bool {
285        self.as_str() == *other
286    }
287}
288
289impl PartialEq<String> for FlyStr {
290    #[inline]
291    fn eq(&self, other: &String) -> bool {
292        self.as_str() == &**other
293    }
294}
295
296impl PartialEq<FlyByteStr> for FlyStr {
297    #[inline]
298    fn eq(&self, other: &FlyByteStr) -> bool {
299        self.0 == other.0
300    }
301}
302
303impl PartialEq<&'_ FlyByteStr> for FlyStr {
304    #[inline]
305    fn eq(&self, other: &&FlyByteStr) -> bool {
306        self.0 == other.0
307    }
308}
309
310impl PartialOrd<str> for FlyStr {
311    #[inline]
312    fn partial_cmp(&self, other: &str) -> Option<std::cmp::Ordering> {
313        self.as_str().partial_cmp(other)
314    }
315}
316
317impl PartialOrd<&str> for FlyStr {
318    #[inline]
319    fn partial_cmp(&self, other: &&str) -> Option<std::cmp::Ordering> {
320        self.as_str().partial_cmp(*other)
321    }
322}
323
324impl PartialOrd<FlyByteStr> for FlyStr {
325    #[inline]
326    fn partial_cmp(&self, other: &FlyByteStr) -> Option<std::cmp::Ordering> {
327        BStr::new(self.as_str()).partial_cmp(other.as_bstr())
328    }
329}
330
331impl PartialOrd<&'_ FlyByteStr> for FlyStr {
332    #[inline]
333    fn partial_cmp(&self, other: &&FlyByteStr) -> Option<std::cmp::Ordering> {
334        BStr::new(self.as_str()).partial_cmp(other.as_bstr())
335    }
336}
337
338impl Debug for FlyStr {
339    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
340        Debug::fmt(self.as_str(), f)
341    }
342}
343
344impl Display for FlyStr {
345    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
346        Display::fmt(self.as_str(), f)
347    }
348}
349
350#[cfg(feature = "serde")]
351impl Serialize for FlyStr {
352    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
353        serializer.serialize_str(self.as_str())
354    }
355}
356
357#[cfg(feature = "serde")]
358impl<'d> Deserialize<'d> for FlyStr {
359    fn deserialize<D: Deserializer<'d>>(deserializer: D) -> Result<Self, D::Error> {
360        deserializer.deserialize_str(FlyStrVisitor)
361    }
362}
363
364#[cfg(feature = "serde")]
365struct FlyStrVisitor;
366
367#[cfg(feature = "serde")]
368impl Visitor<'_> for FlyStrVisitor {
369    type Value = FlyStr;
370    fn expecting(&self, formatter: &mut Formatter<'_>) -> FmtResult {
371        formatter.write_str("a string")
372    }
373
374    fn visit_borrowed_str<E>(self, v: &str) -> Result<Self::Value, E>
375    where
376        E: serde::de::Error,
377    {
378        Ok(v.into())
379    }
380
381    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
382    where
383        E: serde::de::Error,
384    {
385        Ok(v.into())
386    }
387
388    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
389    where
390        E: serde::de::Error,
391    {
392        Ok(v.into())
393    }
394}
395
396macro_rules! new_raw_repr {
397    ($borrowed_bytes:expr) => {
398        if $borrowed_bytes.len() <= MAX_INLINE_SIZE {
399            RawRepr::new_inline($borrowed_bytes)
400        } else {
401            let mut cache = CACHE.lock().unwrap();
402            if let Some(existing) = cache.get($borrowed_bytes) {
403                RawRepr::from_storage(existing)
404            } else {
405                RawRepr::new_for_storage(&mut cache, $borrowed_bytes)
406            }
407        }
408    };
409}
410
411/// An immutable bytestring type which only stores a single copy of each string allocated.
412/// Internally represented as a shared pointer to the backing allocation. Occupies a single pointer width.
413///
414/// # Small strings
415///
416/// Very short strings are stored inline in the pointer with bit-tagging, so no allocations are
417/// performed.
418///
419/// # Performance
420///
421/// It's slower to construct than a regular `BString` but trades that for reduced standing memory
422/// usage by deduplicating strings. `PartialEq` and `Hash` are implemented on the underlying pointer
423/// value rather than the pointed-to data for faster equality comparisons and indexing, which is
424/// sound by virtue of the type guaranteeing that only one `FlyByteStr` pointer value will exist at
425/// any time for a given string's contents.
426///
427/// As with any performance optimization, you should only use this type if you can measure the
428/// benefit it provides to your program. Pay careful attention to creating `FlyByteStr`s in hot
429/// paths as it may regress runtime performance.
430///
431/// # Allocation lifecycle
432///
433/// Intended for long-running system services with user-provided values, `FlyByteStr`s are removed
434/// from the global cache when the last reference to them is dropped. While this incurs some
435/// overhead it is important to prevent the value cache from becoming a denial-of-service vector.
436#[derive(Clone, Eq, Hash, PartialEq)]
437pub struct FlyByteStr(RawRepr);
438
439static_assertions::assert_eq_size!(FlyByteStr, usize);
440
441// `Borrow` requires that:
442//
443// > Eq, Ord and Hash must be equivalent for borrowed and owned values: x.borrow() == y.borrow()
444// > should give the same result as x == y.
445//
446// The current implementation of `FlyByteStr` cannot satisfy `Borrow<str>` because the O(1) hashing
447// feature means that `s.hash() != s.borrow().hash()`.
448static_assertions::assert_not_impl_any!(FlyByteStr: Borrow<str>);
449
450impl FlyByteStr {
451    /// Create a `FlyByteStr`, allocating it in the cache if the value is not already cached.
452    ///
453    /// # Performance
454    ///
455    /// Creating an instance of this type for strings longer than the maximum inline size requires
456    /// accessing the global cache of strings, which involves taking a lock. When multiple threads
457    /// are allocating lots of strings there may be contention. Each string allocated is hashed for
458    /// lookup in the cache.
459    pub fn new(s: impl AsRef<[u8]> + Into<Vec<u8>>) -> Self {
460        Self(RawRepr::new(s))
461    }
462
463    /// Returns the underlying bytestring slice.
464    #[inline]
465    pub fn as_bstr(&self) -> &BStr {
466        BStr::new(self.0.as_bytes())
467    }
468
469    /// Returns the underlying byte slice.
470    #[inline]
471    pub fn as_bytes(&self) -> &[u8] {
472        self.0.as_bytes()
473    }
474}
475
476impl Default for FlyByteStr {
477    #[inline]
478    fn default() -> Self {
479        Self::new(b"")
480    }
481}
482
483impl From<&'_ [u8]> for FlyByteStr {
484    #[inline]
485    fn from(s: &[u8]) -> Self {
486        Self::new(s)
487    }
488}
489
490impl<const N: usize> From<[u8; N]> for FlyByteStr {
491    #[inline]
492    fn from(s: [u8; N]) -> Self {
493        Self::new(s)
494    }
495}
496
497impl From<&'_ BStr> for FlyByteStr {
498    #[inline]
499    fn from(s: &BStr) -> Self {
500        let bytes: &[u8] = s.as_ref();
501        Self(new_raw_repr!(bytes))
502    }
503}
504
505impl From<&'_ str> for FlyByteStr {
506    #[inline]
507    fn from(s: &str) -> Self {
508        Self::new(s)
509    }
510}
511
512impl From<&'_ Vec<u8>> for FlyByteStr {
513    #[inline]
514    fn from(s: &Vec<u8>) -> Self {
515        Self(new_raw_repr!(&s[..]))
516    }
517}
518
519impl From<&'_ String> for FlyByteStr {
520    #[inline]
521    fn from(s: &String) -> Self {
522        Self::new(&**s)
523    }
524}
525
526impl From<Vec<u8>> for FlyByteStr {
527    #[inline]
528    fn from(s: Vec<u8>) -> Self {
529        Self::new(s)
530    }
531}
532
533impl From<String> for FlyByteStr {
534    #[inline]
535    fn from(s: String) -> Self {
536        Self::new(s)
537    }
538}
539
540impl From<BString> for FlyByteStr {
541    #[inline]
542    fn from(s: BString) -> Self {
543        Self::new(s)
544    }
545}
546
547impl From<Box<[u8]>> for FlyByteStr {
548    #[inline]
549    fn from(s: Box<[u8]>) -> Self {
550        Self::new(s)
551    }
552}
553
554impl From<Box<str>> for FlyByteStr {
555    #[inline]
556    fn from(s: Box<str>) -> Self {
557        Self(new_raw_repr!(s.as_bytes()))
558    }
559}
560
561impl From<&'_ Box<[u8]>> for FlyByteStr {
562    #[inline]
563    fn from(s: &'_ Box<[u8]>) -> Self {
564        Self(new_raw_repr!(&**s))
565    }
566}
567
568impl From<&Box<str>> for FlyByteStr {
569    #[inline]
570    fn from(s: &Box<str>) -> Self {
571        Self::new(&**s)
572    }
573}
574
575impl From<FlyByteStr> for BString {
576    #[inline]
577    fn from(s: FlyByteStr) -> BString {
578        s.as_bstr().to_owned()
579    }
580}
581
582impl From<FlyByteStr> for Vec<u8> {
583    #[inline]
584    fn from(s: FlyByteStr) -> Vec<u8> {
585        s.as_bytes().to_owned()
586    }
587}
588
589impl From<FlyStr> for FlyByteStr {
590    #[inline]
591    fn from(s: FlyStr) -> FlyByteStr {
592        Self(s.0)
593    }
594}
595
596impl TryInto<String> for FlyByteStr {
597    type Error = std::string::FromUtf8Error;
598
599    #[inline]
600    fn try_into(self) -> Result<String, Self::Error> {
601        String::from_utf8(self.into())
602    }
603}
604
605impl Deref for FlyByteStr {
606    type Target = BStr;
607
608    #[inline]
609    fn deref(&self) -> &Self::Target {
610        self.as_bstr()
611    }
612}
613
614impl AsRef<BStr> for FlyByteStr {
615    #[inline]
616    fn as_ref(&self) -> &BStr {
617        self.as_bstr()
618    }
619}
620
621impl PartialOrd for FlyByteStr {
622    #[inline]
623    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
624        Some(self.cmp(other))
625    }
626}
627impl Ord for FlyByteStr {
628    #[inline]
629    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
630        self.as_bstr().cmp(other.as_bstr())
631    }
632}
633
634impl PartialEq<[u8]> for FlyByteStr {
635    #[inline]
636    fn eq(&self, other: &[u8]) -> bool {
637        self.as_bytes() == other
638    }
639}
640
641impl PartialEq<BStr> for FlyByteStr {
642    #[inline]
643    fn eq(&self, other: &BStr) -> bool {
644        self.as_bytes() == other
645    }
646}
647
648impl PartialEq<str> for FlyByteStr {
649    #[inline]
650    fn eq(&self, other: &str) -> bool {
651        self.as_bytes() == other.as_bytes()
652    }
653}
654
655impl PartialEq<&'_ [u8]> for FlyByteStr {
656    #[inline]
657    fn eq(&self, other: &&[u8]) -> bool {
658        self.as_bytes() == *other
659    }
660}
661
662impl PartialEq<&'_ BStr> for FlyByteStr {
663    #[inline]
664    fn eq(&self, other: &&BStr) -> bool {
665        self.as_bstr() == *other
666    }
667}
668
669impl PartialEq<&'_ str> for FlyByteStr {
670    #[inline]
671    fn eq(&self, other: &&str) -> bool {
672        self.as_bytes() == other.as_bytes()
673    }
674}
675
676impl PartialEq<String> for FlyByteStr {
677    #[inline]
678    fn eq(&self, other: &String) -> bool {
679        self.as_bytes() == other.as_bytes()
680    }
681}
682
683impl PartialEq<FlyStr> for FlyByteStr {
684    #[inline]
685    fn eq(&self, other: &FlyStr) -> bool {
686        self.0 == other.0
687    }
688}
689
690impl PartialEq<&'_ FlyStr> for FlyByteStr {
691    #[inline]
692    fn eq(&self, other: &&FlyStr) -> bool {
693        self.0 == other.0
694    }
695}
696
697impl PartialOrd<str> for FlyByteStr {
698    #[inline]
699    fn partial_cmp(&self, other: &str) -> Option<std::cmp::Ordering> {
700        self.as_bstr().partial_cmp(other)
701    }
702}
703
704impl PartialOrd<&str> for FlyByteStr {
705    #[inline]
706    fn partial_cmp(&self, other: &&str) -> Option<std::cmp::Ordering> {
707        self.as_bstr().partial_cmp(other)
708    }
709}
710
711impl PartialOrd<FlyStr> for FlyByteStr {
712    #[inline]
713    fn partial_cmp(&self, other: &FlyStr) -> Option<std::cmp::Ordering> {
714        self.as_bstr().partial_cmp(other.as_str())
715    }
716}
717
718impl Debug for FlyByteStr {
719    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
720        Debug::fmt(self.as_bstr(), f)
721    }
722}
723
724impl Display for FlyByteStr {
725    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
726        Display::fmt(self.as_bstr(), f)
727    }
728}
729
730#[cfg(feature = "serde")]
731impl Serialize for FlyByteStr {
732    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
733        serializer.serialize_bytes(self.as_bytes())
734    }
735}
736
737#[cfg(feature = "serde")]
738impl<'d> Deserialize<'d> for FlyByteStr {
739    fn deserialize<D: Deserializer<'d>>(deserializer: D) -> Result<Self, D::Error> {
740        deserializer.deserialize_bytes(FlyByteStrVisitor)
741    }
742}
743
744#[cfg(feature = "serde")]
745struct FlyByteStrVisitor;
746
747#[cfg(feature = "serde")]
748impl<'de> Visitor<'de> for FlyByteStrVisitor {
749    type Value = FlyByteStr;
750    fn expecting(&self, formatter: &mut Formatter<'_>) -> FmtResult {
751        formatter.write_str("a string, a bytestring, or a sequence of bytes")
752    }
753
754    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
755    where
756        E: serde::de::Error,
757    {
758        Ok(FlyByteStr::from(v))
759    }
760
761    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
762    where
763        E: serde::de::Error,
764    {
765        Ok(FlyByteStr::from(v))
766    }
767
768    fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
769    where
770        E: serde::de::Error,
771    {
772        Ok(FlyByteStr::from(v))
773    }
774
775    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
776    where
777        A: serde::de::SeqAccess<'de>,
778    {
779        let mut bytes = vec![];
780        while let Some(b) = seq.next_element::<u8>()? {
781            bytes.push(b);
782        }
783        Ok(FlyByteStr::from(bytes))
784    }
785}
786
787#[repr(C)] // Guarantee predictable field ordering.
788union RawRepr {
789    /// Strings longer than MAX_INLINE_SIZE are allocated using `raw::Payload`, which have a thin
790    /// pointer representation. This means that `heap` variants of `Storage` will always have the
791    /// pointer contents aligned and this variant will never have its least significant bit set.
792    ///
793    /// We store a `NonNull` so we can have guaranteed pointer layout.
794    heap: NonNull<raw::Payload>,
795
796    /// Strings shorter than or equal in length to MAX_INLINE_SIZE are stored in this union variant.
797    /// The first byte is reserved for the size of the inline string, and the remaining bytes are
798    /// used for the string itself. The first byte has its least significant bit set to 1 to
799    /// distinguish inline strings from heap-allocated ones, and the size is stored in the remaining
800    /// 7 bits.
801    inline: InlineRepr,
802}
803
804// The inline variant should not cause us to occupy more space than the heap variant alone.
805static_assertions::assert_eq_size!(NonNull<raw::Payload>, RawRepr);
806
807// Alignment of the payload pointers must be >1 in order to have space for the mask bit at the
808// bottom.
809static_assertions::const_assert!(std::mem::align_of::<raw::Payload>() > 1);
810
811// The short string optimization makes little-endian layout assumptions with the first byte being
812// the least significant.
813static_assertions::assert_type_eq_all!(byteorder::NativeEndian, byteorder::LittleEndian);
814
815/// An enum with an actual discriminant that allows us to limit the reach of unsafe code in the
816/// implementation without affecting the stored size of `RawRepr`.
817enum SafeRepr<'a> {
818    Heap(NonNull<raw::Payload>),
819    Inline(&'a InlineRepr),
820}
821
822// SAFETY: FlyStr can be dropped from any thread.
823unsafe impl Send for RawRepr {}
824// SAFETY: FlyStr has an immutable public API.
825unsafe impl Sync for RawRepr {}
826
827impl RawRepr {
828    fn new_str(s: impl AsRef<str>) -> Self {
829        let borrowed = s.as_ref();
830        new_raw_repr!(borrowed.as_bytes())
831    }
832
833    fn new(s: impl AsRef<[u8]>) -> Self {
834        let borrowed = s.as_ref();
835        new_raw_repr!(borrowed)
836    }
837
838    #[inline]
839    fn new_inline(s: &[u8]) -> Self {
840        assert!(s.len() <= MAX_INLINE_SIZE);
841        let new = Self {
842            inline: InlineRepr::new(s),
843        };
844        assert!(
845            new.is_inline(),
846            "least significant bit must be 1 for inline strings"
847        );
848        new
849    }
850
851    #[inline]
852    fn from_storage(storage: &Storage) -> Self {
853        if storage.inc_ref() == 0 {
854            // Another thread is trying to lock the cache and free this string. They already
855            // released their refcount, so give it back to them. This will prevent this thread
856            // and other threads from attempting to free the string if they drop the refcount back
857            // down.
858            storage.inc_ref();
859        }
860        Self { heap: storage.0 }
861    }
862
863    #[inline]
864    fn new_for_storage(cache: &mut AHashSet<Storage>, bytes: &[u8]) -> Self {
865        assert!(bytes.len() > MAX_INLINE_SIZE);
866        // `Payload::alloc` starts the refcount at 1.
867        let new_storage = raw::Payload::alloc(bytes);
868
869        let for_cache = Storage(new_storage);
870        let new = Self { heap: new_storage };
871        assert!(
872            !new.is_inline(),
873            "least significant bit must be 0 for heap strings"
874        );
875        cache.insert(for_cache);
876        new
877    }
878
879    #[inline]
880    fn is_inline(&self) -> bool {
881        // SAFETY: it is always OK to interpret a pointer as byte array as long as we don't expect
882        // to retain provenance.
883        (unsafe { self.inline.masked_len } & 1) == 1
884    }
885
886    #[inline]
887    fn project(&self) -> SafeRepr<'_> {
888        if self.is_inline() {
889            // SAFETY: Just checked that this is the inline variant.
890            SafeRepr::Inline(unsafe { &self.inline })
891        } else {
892            // SAFETY: Just checked that this is the heap variant.
893            SafeRepr::Heap(unsafe { self.heap })
894        }
895    }
896
897    #[inline]
898    fn as_bytes(&self) -> &[u8] {
899        match self.project() {
900            // SAFETY: FlyStr owns the payload stored as a NonNull, it is live as long as `FlyStr`.
901            SafeRepr::Heap(ptr) => unsafe { &*raw::Payload::bytes(ptr.as_ptr()) },
902            SafeRepr::Inline(i) => i.as_bytes(),
903        }
904    }
905}
906
907impl PartialEq for RawRepr {
908    #[inline]
909    fn eq(&self, other: &Self) -> bool {
910        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
911        // to retain provenance.
912        let lhs = unsafe { &self.inline };
913        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
914        // to retain provenance.
915        let rhs = unsafe { &other.inline };
916        lhs.eq(rhs)
917    }
918}
919impl Eq for RawRepr {}
920
921impl Hash for RawRepr {
922    fn hash<H: Hasher>(&self, h: &mut H) {
923        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
924        // to retain provenance.
925        let this = unsafe { &self.inline };
926        this.hash(h);
927    }
928}
929
930impl Clone for RawRepr {
931    fn clone(&self) -> Self {
932        match self.project() {
933            SafeRepr::Heap(ptr) => {
934                // SAFETY: We own this payload, we know it's live because we are.
935                unsafe {
936                    raw::Payload::inc_ref(ptr.as_ptr());
937                }
938                Self { heap: ptr }
939            }
940            SafeRepr::Inline(&inline) => Self { inline },
941        }
942    }
943}
944
945impl Drop for RawRepr {
946    fn drop(&mut self) {
947        if !self.is_inline() {
948            // SAFETY: We checked above that this is the heap repr.
949            let heap = unsafe { self.heap };
950
951            // Decrementing the refcount before locking the cache causes the following failure mode:
952            //
953            // 1. We drop the refcount to 0.
954            // 2. Another thread finds the string we're about to drop and increments the refcount
955            //    back up to 1.
956            // 3. That thread drops its refcount, and also sees the refcount drop to 0.
957            // 4. That thread locks the cache, removes the value, and drops the payload. This leaves
958            //    us with a dangling pointer to the dropped payload.
959            // 5. We lock the cache and go to look up our value in the cache. Our payload pointer is
960            //    dangling now, but we don't know that. If we try to read through our dangling
961            //    pointer, we cause UB.
962            //
963            // To account for this failure mode and still optimistically drop our refcount, we
964            // modify the procedure slightly:
965            //
966            // 1. We drop the refcount to 0.
967            // 2. Another thread finds the string we're about to drop and increments the refcount
968            //    back up to 1. It notices that the refcount incremented from 0 to 1, and so knows
969            //    that our thread will try to drop it. While still holding the cache lock, that
970            //    thread increments the refcount again from 1 to 2. This "gives back" the refcount
971            //    to our thread.
972            // 3. That thread drops its refcount, and sees the refcount drop to 1. It won't try to
973            //    drop the payload this time.
974            // 4. We lock the cache, and decrement the refcount a second time. If it decremented
975            //    from 0 or 1, then we know that no other threads are currently holding references
976            //    to it and we can safely drop it ourselves.
977
978            // SAFETY: The payload is live.
979            let prev_refcount = unsafe { raw::Payload::dec_ref(heap.as_ptr()) };
980
981            // If we held the final refcount outside of the cache, try to remove the string.
982            if prev_refcount == 1 {
983                let mut cache = CACHE.lock().unwrap();
984
985                let current_refcount = unsafe { raw::Payload::dec_ref(heap.as_ptr()) };
986                if current_refcount <= 1 {
987                    // If the refcount was still 0 after acquiring the cache lock, no other thread
988                    // looked up this payload between optimistically decrementing the refcount and
989                    // now. If the refcount was 1, then another thread did, but dropped its refcount
990                    // before we got the cache lock. Either way, we can safely remove the string
991                    // from the cache and free it.
992
993                    let bytes = unsafe { &*raw::Payload::bytes(heap.as_ptr()) };
994                    assert!(
995                        cache.remove(bytes),
996                        "cache did not contain bytes, but this thread didn't remove them yet",
997                    );
998
999                    // Get out of the critical section as soon as possible
1000                    drop(cache);
1001
1002                    // SAFETY: The payload is live.
1003                    unsafe { raw::Payload::dealloc(heap.as_ptr()) };
1004                } else {
1005                    // Another thread looked up this payload, made a reference to it, and gave our
1006                    // refcount back to us for a minmium refcount of 2. We re-removed our refcount,
1007                    // giving a minimum of one. This means it's no longer our responsibility to
1008                    // deallocate the string, so we ended up needlessly locking the cache.
1009                }
1010            }
1011        }
1012    }
1013}
1014
1015#[derive(Clone, Copy, Hash, PartialEq)]
1016#[repr(C)] // Preserve field ordering.
1017struct InlineRepr {
1018    /// The first byte, which corresponds to the LSB of a pointer in the other variant.
1019    ///
1020    /// When the first bit is `1` the rest of this byte stores the length of the inline string.
1021    masked_len: u8,
1022    /// Inline string contents.
1023    contents: [u8; MAX_INLINE_SIZE],
1024}
1025
1026/// We can store small strings up to 1 byte less than the size of the pointer to the heap-allocated
1027/// string.
1028const MAX_INLINE_SIZE: usize = std::mem::size_of::<NonNull<raw::Payload>>() - 1;
1029
1030// Guard rail to make sure we never end up with an incorrect inline size encoding. Ensure that
1031// MAX_INLINE_SIZE is always smaller than the maximum size we can represent in a byte with the LSB
1032// reserved.
1033static_assertions::const_assert!((u8::MAX >> 1) as usize >= MAX_INLINE_SIZE);
1034
1035impl InlineRepr {
1036    #[inline]
1037    fn new(s: &[u8]) -> Self {
1038        assert!(s.len() <= MAX_INLINE_SIZE);
1039
1040        // Set the first byte to the length of the inline string with LSB masked to 1.
1041        let masked_len = ((s.len() as u8) << 1) | 1;
1042
1043        let mut contents = [0u8; MAX_INLINE_SIZE];
1044        contents[..s.len()].copy_from_slice(s);
1045
1046        Self {
1047            masked_len,
1048            contents,
1049        }
1050    }
1051
1052    #[inline]
1053    fn as_bytes(&self) -> &[u8] {
1054        let len = self.masked_len >> 1;
1055        &self.contents[..len as usize]
1056    }
1057}
1058
1059#[cfg(test)]
1060mod tests {
1061    use super::*;
1062    use static_assertions::{const_assert, const_assert_eq};
1063    use std::collections::BTreeSet;
1064    use test_case::test_case;
1065
1066    // These tests all manipulate the process-global cache in the parent module. On target devices
1067    // we run each test case in its own process, so the test cases can't pollute each other. On
1068    // host, we run tests with a process for each suite (which is the Rust upstream default), and
1069    // we need to manually isolate the tests from each other.
1070    #[cfg(not(target_os = "fuchsia"))]
1071    use serial_test::serial;
1072
1073    fn reset_global_cache() {
1074        // We still want subsequent tests to be able to run if one in the same process panics.
1075        match CACHE.lock() {
1076            Ok(mut c) => *c = AHashSet::new(),
1077            Err(e) => *e.into_inner() = AHashSet::new(),
1078        }
1079    }
1080    fn num_strings_in_global_cache() -> usize {
1081        CACHE.lock().unwrap().len()
1082    }
1083
1084    impl RawRepr {
1085        fn refcount(&self) -> Option<usize> {
1086            match self.project() {
1087                SafeRepr::Heap(ptr) => {
1088                    // SAFETY: The payload is live as long as the repr is live.
1089                    let count = unsafe { raw::Payload::refcount(ptr.as_ptr()) };
1090                    Some(count)
1091                }
1092                SafeRepr::Inline(_) => None,
1093            }
1094        }
1095    }
1096
1097    const SHORT_STRING: &str = "hello";
1098    const_assert!(SHORT_STRING.len() < MAX_INLINE_SIZE);
1099
1100    const MAX_LEN_SHORT_STRING: &str = "hello!!";
1101    const_assert_eq!(MAX_LEN_SHORT_STRING.len(), MAX_INLINE_SIZE);
1102
1103    const MIN_LEN_LONG_STRING: &str = "hello!!!";
1104    const_assert_eq!(MIN_LEN_LONG_STRING.len(), MAX_INLINE_SIZE + 1);
1105
1106    const LONG_STRING: &str = "hello, world!!!!!!!!!!!!!!!!!!!!";
1107    const_assert!(LONG_STRING.len() > MAX_INLINE_SIZE);
1108
1109    const SHORT_NON_UTF8: &[u8] = b"\xF0\x28\x8C\x28";
1110    const_assert!(SHORT_NON_UTF8.len() < MAX_INLINE_SIZE);
1111
1112    const LONG_NON_UTF8: &[u8] = b"\xF0\x28\x8C\x28\xF0\x28\x8C\x28";
1113    const_assert!(LONG_NON_UTF8.len() > MAX_INLINE_SIZE);
1114
1115    #[test_case("" ; "empty string")]
1116    #[test_case(SHORT_STRING ; "short strings")]
1117    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1118    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1119    #[test_case(LONG_STRING ; "long strings")]
1120    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1121    fn string_formatting_is_equivalent_to_str(original: &str) {
1122        reset_global_cache();
1123
1124        let cached = FlyStr::new(original);
1125        assert_eq!(format!("{original}"), format!("{cached}"));
1126        assert_eq!(format!("{original:?}"), format!("{cached:?}"));
1127
1128        let cached = FlyByteStr::new(original);
1129        assert_eq!(format!("{original}"), format!("{cached}"));
1130        assert_eq!(format!("{original:?}"), format!("{cached:?}"));
1131    }
1132
1133    #[test_case("" ; "empty string")]
1134    #[test_case(SHORT_STRING ; "short strings")]
1135    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1136    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1137    #[test_case(LONG_STRING ; "long strings")]
1138    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1139    fn string_equality_works(contents: &str) {
1140        reset_global_cache();
1141
1142        let cached = FlyStr::new(contents);
1143        let bytes_cached = FlyByteStr::new(contents);
1144        assert_eq!(cached, cached.clone(), "must be equal to itself");
1145        assert_eq!(cached, contents, "must be equal to the original");
1146        assert_eq!(
1147            cached,
1148            contents.to_owned(),
1149            "must be equal to an owned copy of the original"
1150        );
1151        assert_eq!(cached, bytes_cached);
1152
1153        // test inequality too
1154        assert_ne!(cached, "goodbye");
1155        assert_ne!(bytes_cached, "goodbye");
1156    }
1157
1158    #[test_case("", SHORT_STRING ; "empty and short string")]
1159    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1160    #[test_case(MAX_LEN_SHORT_STRING, MIN_LEN_LONG_STRING ; "short and long strings")]
1161    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1162    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1163    fn string_comparison_works(lesser_contents: &str, greater_contents: &str) {
1164        reset_global_cache();
1165
1166        let lesser = FlyStr::new(lesser_contents);
1167        let lesser_bytes = FlyByteStr::from(lesser_contents);
1168        let greater = FlyStr::new(greater_contents);
1169        let greater_bytes = FlyByteStr::from(greater_contents);
1170
1171        // lesser as method receiver
1172        assert!(lesser < greater);
1173        assert!(lesser < greater_bytes);
1174        assert!(lesser_bytes < greater);
1175        assert!(lesser_bytes < greater_bytes);
1176        assert!(lesser <= greater);
1177        assert!(lesser <= greater_bytes);
1178        assert!(lesser_bytes <= greater);
1179        assert!(lesser_bytes <= greater_bytes);
1180
1181        // greater as method receiver
1182        assert!(greater > lesser);
1183        assert!(greater > lesser_bytes);
1184        assert!(greater_bytes > lesser);
1185        assert!(greater >= lesser);
1186        assert!(greater >= lesser_bytes);
1187        assert!(greater_bytes >= lesser);
1188        assert!(greater_bytes >= lesser_bytes);
1189    }
1190
1191    #[test_case("" ; "empty string")]
1192    #[test_case(SHORT_STRING ; "short strings")]
1193    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1194    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1195    fn no_allocations_for_short_strings(contents: &str) {
1196        reset_global_cache();
1197        assert_eq!(num_strings_in_global_cache(), 0);
1198
1199        let original = FlyStr::new(contents);
1200        assert_eq!(num_strings_in_global_cache(), 0);
1201        assert_eq!(original.0.refcount(), None);
1202
1203        let cloned = original.clone();
1204        assert_eq!(num_strings_in_global_cache(), 0);
1205        assert_eq!(cloned.0.refcount(), None);
1206
1207        let deduped = FlyStr::new(contents);
1208        assert_eq!(num_strings_in_global_cache(), 0);
1209        assert_eq!(deduped.0.refcount(), None);
1210    }
1211
1212    #[test_case("" ; "empty string")]
1213    #[test_case(SHORT_STRING ; "short strings")]
1214    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1215    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1216    fn no_allocations_for_short_bytestrings(contents: &str) {
1217        reset_global_cache();
1218        assert_eq!(num_strings_in_global_cache(), 0);
1219
1220        let original = FlyByteStr::new(contents);
1221        assert_eq!(num_strings_in_global_cache(), 0);
1222        assert_eq!(original.0.refcount(), None);
1223
1224        let cloned = original.clone();
1225        assert_eq!(num_strings_in_global_cache(), 0);
1226        assert_eq!(cloned.0.refcount(), None);
1227
1228        let deduped = FlyByteStr::new(contents);
1229        assert_eq!(num_strings_in_global_cache(), 0);
1230        assert_eq!(deduped.0.refcount(), None);
1231    }
1232
1233    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1234    #[test_case(LONG_STRING ; "long strings")]
1235    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1236    fn only_one_copy_allocated_for_long_strings(contents: &str) {
1237        reset_global_cache();
1238
1239        assert_eq!(num_strings_in_global_cache(), 0);
1240
1241        let original = FlyStr::new(contents);
1242        assert_eq!(
1243            num_strings_in_global_cache(),
1244            1,
1245            "only one string allocated"
1246        );
1247        assert_eq!(original.0.refcount(), Some(1), "one copy on stack");
1248
1249        let cloned = original.clone();
1250        assert_eq!(
1251            num_strings_in_global_cache(),
1252            1,
1253            "cloning just incremented refcount"
1254        );
1255        assert_eq!(cloned.0.refcount(), Some(2), "two copies on stack");
1256
1257        let deduped = FlyStr::new(contents);
1258        assert_eq!(num_strings_in_global_cache(), 1, "new string was deduped");
1259        assert_eq!(deduped.0.refcount(), Some(3), "three copies on stack");
1260    }
1261
1262    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1263    #[test_case(LONG_STRING ; "long strings")]
1264    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1265    fn only_one_copy_allocated_for_long_bytestrings(contents: &str) {
1266        reset_global_cache();
1267
1268        assert_eq!(num_strings_in_global_cache(), 0);
1269
1270        let original = FlyByteStr::new(contents);
1271        assert_eq!(
1272            num_strings_in_global_cache(),
1273            1,
1274            "only one string allocated"
1275        );
1276        assert_eq!(original.0.refcount(), Some(1), "one copy on stack");
1277
1278        let cloned = original.clone();
1279        assert_eq!(
1280            num_strings_in_global_cache(),
1281            1,
1282            "cloning just incremented refcount"
1283        );
1284        assert_eq!(cloned.0.refcount(), Some(2), "two copies on stack");
1285
1286        let deduped = FlyByteStr::new(contents);
1287        assert_eq!(num_strings_in_global_cache(), 1, "new string was deduped");
1288        assert_eq!(deduped.0.refcount(), Some(3), "three copies on stack");
1289    }
1290
1291    #[test]
1292    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1293    fn utf8_and_bytestrings_share_the_cache() {
1294        reset_global_cache();
1295
1296        assert_eq!(num_strings_in_global_cache(), 0, "cache is empty");
1297
1298        let _utf8 = FlyStr::from(MIN_LEN_LONG_STRING);
1299        assert_eq!(num_strings_in_global_cache(), 1, "string was allocated");
1300
1301        let _bytes = FlyByteStr::from(MIN_LEN_LONG_STRING);
1302        assert_eq!(
1303            num_strings_in_global_cache(),
1304            1,
1305            "bytestring was pulled from cache"
1306        );
1307    }
1308
1309    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1310    #[test_case(LONG_STRING ; "long strings")]
1311    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1312    fn cached_strings_dropped_when_refs_dropped(contents: &str) {
1313        reset_global_cache();
1314
1315        let alloced = FlyStr::new(contents);
1316        assert_eq!(
1317            num_strings_in_global_cache(),
1318            1,
1319            "only one string allocated"
1320        );
1321        drop(alloced);
1322        assert_eq!(num_strings_in_global_cache(), 0, "last reference dropped");
1323    }
1324
1325    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1326    #[test_case(LONG_STRING ; "long strings")]
1327    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1328    fn cached_bytestrings_dropped_when_refs_dropped(contents: &str) {
1329        reset_global_cache();
1330
1331        let alloced = FlyByteStr::new(contents);
1332        assert_eq!(
1333            num_strings_in_global_cache(),
1334            1,
1335            "only one string allocated"
1336        );
1337        drop(alloced);
1338        assert_eq!(num_strings_in_global_cache(), 0, "last reference dropped");
1339    }
1340
1341    #[test_case("", SHORT_STRING ; "empty and short string")]
1342    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1343    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1344    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1345    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1346    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1347    fn equality_and_hashing_with_pointer_value_works_correctly(first: &str, second: &str) {
1348        reset_global_cache();
1349
1350        let first = FlyStr::new(first);
1351        let second = FlyStr::new(second);
1352
1353        let mut set = AHashSet::new();
1354        set.insert(first.clone());
1355        assert!(set.contains(&first));
1356        assert!(!set.contains(&second));
1357
1358        // re-insert the same cmstring
1359        set.insert(first);
1360        assert_eq!(
1361            set.len(),
1362            1,
1363            "set did not grow because the same string was inserted as before"
1364        );
1365
1366        set.insert(second.clone());
1367        assert_eq!(
1368            set.len(),
1369            2,
1370            "inserting a different string must mutate the set"
1371        );
1372        assert!(set.contains(&second));
1373
1374        // re-insert the second cmstring
1375        set.insert(second);
1376        assert_eq!(set.len(), 2);
1377    }
1378
1379    #[test_case("", SHORT_STRING ; "empty and short string")]
1380    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1381    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1382    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1383    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1384    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1385    fn byte_equality_and_hashing_with_pointer_value_works_correctly(first: &str, second: &str) {
1386        reset_global_cache();
1387
1388        let first = FlyByteStr::new(first);
1389        let second = FlyByteStr::new(second);
1390
1391        let mut set = AHashSet::new();
1392        set.insert(first.clone());
1393        assert!(set.contains(&first));
1394        assert!(!set.contains(&second));
1395
1396        // re-insert the same string
1397        set.insert(first);
1398        assert_eq!(
1399            set.len(),
1400            1,
1401            "set did not grow because the same string was inserted as before"
1402        );
1403
1404        set.insert(second.clone());
1405        assert_eq!(
1406            set.len(),
1407            2,
1408            "inserting a different string must mutate the set"
1409        );
1410        assert!(set.contains(&second));
1411
1412        // re-insert the second string
1413        set.insert(second);
1414        assert_eq!(set.len(), 2);
1415    }
1416
1417    #[test_case("", SHORT_STRING ; "empty and short string")]
1418    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1419    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1420    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1421    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1422    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1423    fn comparison_for_btree_storage_works(first: &str, second: &str) {
1424        reset_global_cache();
1425
1426        let first = FlyStr::new(first);
1427        let second = FlyStr::new(second);
1428
1429        let mut set = BTreeSet::new();
1430        set.insert(first.clone());
1431        assert!(set.contains(&first));
1432        assert!(!set.contains(&second));
1433
1434        // re-insert the same cmstring
1435        set.insert(first);
1436        assert_eq!(
1437            set.len(),
1438            1,
1439            "set did not grow because the same string was inserted as before"
1440        );
1441
1442        set.insert(second.clone());
1443        assert_eq!(
1444            set.len(),
1445            2,
1446            "inserting a different string must mutate the set"
1447        );
1448        assert!(set.contains(&second));
1449
1450        // re-insert the second cmstring
1451        set.insert(second);
1452        assert_eq!(set.len(), 2);
1453    }
1454
1455    #[test_case("", SHORT_STRING ; "empty and short string")]
1456    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1457    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1458    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1459    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1460    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1461    fn byte_comparison_for_btree_storage_works(first: &str, second: &str) {
1462        reset_global_cache();
1463
1464        let first = FlyByteStr::new(first);
1465        let second = FlyByteStr::new(second);
1466
1467        let mut set = BTreeSet::new();
1468        set.insert(first.clone());
1469        assert!(set.contains(&first));
1470        assert!(!set.contains(&second));
1471
1472        // re-insert the same string
1473        set.insert(first);
1474        assert_eq!(
1475            set.len(),
1476            1,
1477            "set did not grow because the same string was inserted as before"
1478        );
1479
1480        set.insert(second.clone());
1481        assert_eq!(
1482            set.len(),
1483            2,
1484            "inserting a different string must mutate the set"
1485        );
1486        assert!(set.contains(&second));
1487
1488        // re-insert the second string
1489        set.insert(second);
1490        assert_eq!(set.len(), 2);
1491    }
1492
1493    #[cfg(feature = "serde")]
1494    #[test_case("" ; "empty string")]
1495    #[test_case(SHORT_STRING ; "short strings")]
1496    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1497    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1498    #[test_case(LONG_STRING ; "long strings")]
1499    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1500    fn serde_works(contents: &str) {
1501        reset_global_cache();
1502
1503        let s = FlyStr::new(contents);
1504        let as_json = serde_json::to_string(&s).unwrap();
1505        assert_eq!(as_json, format!("\"{contents}\""));
1506        assert_eq!(s, serde_json::from_str::<FlyStr>(&as_json).unwrap());
1507    }
1508
1509    #[cfg(feature = "serde")]
1510    #[test_case("" ; "empty string")]
1511    #[test_case(SHORT_STRING ; "short strings")]
1512    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1513    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1514    #[test_case(LONG_STRING ; "long strings")]
1515    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1516    fn serde_works_bytestring(contents: &str) {
1517        reset_global_cache();
1518
1519        let s = FlyByteStr::new(contents);
1520        let as_json = serde_json::to_string(&s).unwrap();
1521        assert_eq!(s, serde_json::from_str::<FlyByteStr>(&as_json).unwrap());
1522    }
1523
1524    #[test_case(SHORT_NON_UTF8 ; "short non-utf8 bytestring")]
1525    #[test_case(LONG_NON_UTF8 ; "long non-utf8 bytestring")]
1526    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1527    fn non_utf8_works(contents: &[u8]) {
1528        reset_global_cache();
1529
1530        let res: Result<FlyStr, _> = FlyByteStr::from(contents).try_into();
1531        res.unwrap_err();
1532    }
1533
1534    #[test_case("" ; "empty string")]
1535    #[test_case(SHORT_STRING ; "short strings")]
1536    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1537    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1538    #[test_case(LONG_STRING ; "long strings")]
1539    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1540    fn flystr_to_flybytestr_and_back(contents: &str) {
1541        reset_global_cache();
1542
1543        let bytestr = FlyByteStr::from(contents);
1544        let flystr = FlyStr::try_from(bytestr.clone()).unwrap();
1545        assert_eq!(bytestr, flystr);
1546        let bytestr2 = FlyByteStr::from(flystr.clone());
1547        assert_eq!(bytestr, bytestr2);
1548    }
1549}