flyweights/
lib.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Types implementing the [flyweight pattern](https://en.wikipedia.org/wiki/Flyweight_pattern)
6//! for reusing object allocations.
7
8#![warn(missing_docs)]
9
10mod raw;
11
12use ahash::AHashSet;
13use bstr::{BStr, BString};
14use serde::de::{Deserializer, Visitor};
15use serde::ser::Serializer;
16use serde::{Deserialize, Serialize};
17use std::borrow::Borrow;
18use std::fmt::{Debug, Display, Formatter, Result as FmtResult};
19use std::hash::{Hash, Hasher};
20use std::ops::Deref;
21use std::ptr::NonNull;
22use std::sync::Mutex;
23
24/// The global string cache for `FlyStr`.
25///
26/// If a live `FlyStr` contains an `Storage`, the `Storage` must also be in this cache and it must
27/// have a refcount of >= 2.
28static CACHE: std::sync::LazyLock<Mutex<AHashSet<Storage>>> =
29    std::sync::LazyLock::new(|| Mutex::new(AHashSet::new()));
30
31/// Wrapper type for stored strings that lets us query the cache without an owned value.
32#[repr(transparent)]
33struct Storage(NonNull<raw::Payload>);
34
35// SAFETY: FlyStr storage is always safe to send across threads.
36unsafe impl Send for Storage {}
37// SAFETY: FlyStr storage is always safe to share across threads.
38unsafe impl Sync for Storage {}
39
40impl PartialEq for Storage {
41    #[inline]
42    fn eq(&self, other: &Self) -> bool {
43        self.as_bytes() == other.as_bytes()
44    }
45}
46
47impl Eq for Storage {}
48
49impl Hash for Storage {
50    #[inline]
51    fn hash<H: Hasher>(&self, state: &mut H) {
52        self.as_bytes().hash(state)
53    }
54}
55
56impl Storage {
57    #[inline]
58    fn inc_ref(&self) -> usize {
59        // SAFETY: `Storage` always points to a valid `Payload`.
60        unsafe { raw::Payload::inc_ref(self.0.as_ptr()) }
61    }
62
63    #[inline]
64    fn as_bytes(&self) -> &[u8] {
65        // SAFETY: `Storage` always points to a valid `Payload`.
66        unsafe { &*raw::Payload::bytes(self.0.as_ptr()) }
67    }
68}
69
70impl Borrow<[u8]> for Storage {
71    #[inline]
72    fn borrow(&self) -> &[u8] {
73        self.as_bytes()
74    }
75}
76
77/// An immutable string type which only stores a single copy of each string allocated. Internally
78/// represented as a shared pointer to the backing allocation. Occupies a single pointer width.
79///
80/// # Small strings
81///
82/// Very short strings are stored inline in the pointer with bit-tagging, so no allocations are
83/// performed.
84///
85/// # Performance
86///
87/// It's slower to construct than a regular `String` but trades that for reduced standing memory
88/// usage by deduplicating strings. `PartialEq` and `Hash` are implemented on the underlying pointer
89/// value rather than the pointed-to data for faster equality comparisons and indexing, which is
90/// sound by virtue of the type guaranteeing that only one `FlyStr` pointer value will exist at
91/// any time for a given string's contents.
92///
93/// As with any performance optimization, you should only use this type if you can measure the
94/// benefit it provides to your program. Pay careful attention to creating `FlyStr`s in hot paths
95/// as it may regress runtime performance.
96///
97/// # Allocation lifecycle
98///
99/// Intended for long-running system services with user-provided values, `FlyStr`s are removed from
100/// the global cache when the last reference to them is dropped. While this incurs some overhead
101/// it is important to prevent the value cache from becoming a denial-of-service vector.
102#[derive(Clone, Eq, Hash, PartialEq)]
103pub struct FlyStr(RawRepr);
104
105#[cfg(feature = "json_schema")]
106impl schemars::JsonSchema for FlyStr {
107    fn schema_name() -> String {
108        str::schema_name()
109    }
110
111    fn json_schema(generator: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
112        str::json_schema(generator)
113    }
114
115    fn is_referenceable() -> bool {
116        false
117    }
118}
119
120static_assertions::assert_eq_size!(FlyStr, usize);
121
122impl FlyStr {
123    /// Create a `FlyStr`, allocating it in the cache if the value is not already cached.
124    ///
125    /// # Performance
126    ///
127    /// Creating an instance of this type requires accessing the global cache of strings, which
128    /// involves taking a lock. When multiple threads are allocating lots of strings there may be
129    /// contention.
130    ///
131    /// Each string allocated is hashed for lookup in the cache.
132    #[inline]
133    pub fn new(s: impl AsRef<str> + Into<String>) -> Self {
134        Self(RawRepr::new_str(s))
135    }
136
137    /// Returns the underlying string slice.
138    #[inline]
139    pub fn as_str(&self) -> &str {
140        // SAFETY: Every FlyStr is constructed from valid UTF-8 bytes.
141        unsafe { std::str::from_utf8_unchecked(self.0.as_bytes()) }
142    }
143}
144
145impl Default for FlyStr {
146    #[inline]
147    fn default() -> Self {
148        Self::new("")
149    }
150}
151
152impl From<&'_ str> for FlyStr {
153    #[inline]
154    fn from(s: &str) -> Self {
155        Self::new(s)
156    }
157}
158
159impl From<&'_ String> for FlyStr {
160    #[inline]
161    fn from(s: &String) -> Self {
162        Self::new(&**s)
163    }
164}
165
166impl From<String> for FlyStr {
167    #[inline]
168    fn from(s: String) -> Self {
169        Self::new(s)
170    }
171}
172
173impl From<Box<str>> for FlyStr {
174    #[inline]
175    fn from(s: Box<str>) -> Self {
176        Self::new(s)
177    }
178}
179
180impl From<&Box<str>> for FlyStr {
181    #[inline]
182    fn from(s: &Box<str>) -> Self {
183        Self::new(&**s)
184    }
185}
186
187impl TryFrom<FlyByteStr> for FlyStr {
188    type Error = std::str::Utf8Error;
189
190    #[inline]
191    fn try_from(b: FlyByteStr) -> Result<FlyStr, Self::Error> {
192        // The internals of both FlyStr and FlyByteStr are the same, but it's only sound to return
193        // a FlyStr if the RawRepr contains/points to valid UTF-8.
194        std::str::from_utf8(b.as_bytes())?;
195        Ok(FlyStr(b.0))
196    }
197}
198
199impl Into<String> for FlyStr {
200    #[inline]
201    fn into(self) -> String {
202        self.as_str().to_owned()
203    }
204}
205
206impl Into<String> for &'_ FlyStr {
207    #[inline]
208    fn into(self) -> String {
209        self.as_str().to_owned()
210    }
211}
212
213impl Deref for FlyStr {
214    type Target = str;
215
216    #[inline]
217    fn deref(&self) -> &Self::Target {
218        self.as_str()
219    }
220}
221
222impl AsRef<str> for FlyStr {
223    #[inline]
224    fn as_ref(&self) -> &str {
225        self.as_str()
226    }
227}
228
229impl PartialOrd for FlyStr {
230    #[inline]
231    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
232        Some(self.cmp(other))
233    }
234}
235impl Ord for FlyStr {
236    #[inline]
237    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
238        self.as_str().cmp(other.as_str())
239    }
240}
241
242impl PartialEq<str> for FlyStr {
243    #[inline]
244    fn eq(&self, other: &str) -> bool {
245        self.as_str() == other
246    }
247}
248
249impl PartialEq<&'_ str> for FlyStr {
250    #[inline]
251    fn eq(&self, other: &&str) -> bool {
252        self.as_str() == *other
253    }
254}
255
256impl PartialEq<String> for FlyStr {
257    #[inline]
258    fn eq(&self, other: &String) -> bool {
259        self.as_str() == &**other
260    }
261}
262
263impl PartialEq<FlyByteStr> for FlyStr {
264    #[inline]
265    fn eq(&self, other: &FlyByteStr) -> bool {
266        self.0 == other.0
267    }
268}
269
270impl PartialEq<&'_ FlyByteStr> for FlyStr {
271    #[inline]
272    fn eq(&self, other: &&FlyByteStr) -> bool {
273        self.0 == other.0
274    }
275}
276
277impl PartialOrd<str> for FlyStr {
278    #[inline]
279    fn partial_cmp(&self, other: &str) -> Option<std::cmp::Ordering> {
280        self.as_str().partial_cmp(other)
281    }
282}
283
284impl PartialOrd<&str> for FlyStr {
285    #[inline]
286    fn partial_cmp(&self, other: &&str) -> Option<std::cmp::Ordering> {
287        self.as_str().partial_cmp(*other)
288    }
289}
290
291impl PartialOrd<FlyByteStr> for FlyStr {
292    #[inline]
293    fn partial_cmp(&self, other: &FlyByteStr) -> Option<std::cmp::Ordering> {
294        BStr::new(self.as_str()).partial_cmp(other.as_bstr())
295    }
296}
297
298impl PartialOrd<&'_ FlyByteStr> for FlyStr {
299    #[inline]
300    fn partial_cmp(&self, other: &&FlyByteStr) -> Option<std::cmp::Ordering> {
301        BStr::new(self.as_str()).partial_cmp(other.as_bstr())
302    }
303}
304
305impl Debug for FlyStr {
306    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
307        Debug::fmt(self.as_str(), f)
308    }
309}
310
311impl Display for FlyStr {
312    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
313        Display::fmt(self.as_str(), f)
314    }
315}
316
317impl Serialize for FlyStr {
318    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
319        serializer.serialize_str(self.as_str())
320    }
321}
322
323impl<'d> Deserialize<'d> for FlyStr {
324    fn deserialize<D: Deserializer<'d>>(deserializer: D) -> Result<Self, D::Error> {
325        deserializer.deserialize_str(FlyStrVisitor)
326    }
327}
328
329struct FlyStrVisitor;
330
331impl Visitor<'_> for FlyStrVisitor {
332    type Value = FlyStr;
333    fn expecting(&self, formatter: &mut Formatter<'_>) -> FmtResult {
334        formatter.write_str("a string")
335    }
336
337    fn visit_borrowed_str<'de, E>(self, v: &'de str) -> Result<Self::Value, E>
338    where
339        E: serde::de::Error,
340    {
341        Ok(v.into())
342    }
343
344    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
345    where
346        E: serde::de::Error,
347    {
348        Ok(v.into())
349    }
350
351    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
352    where
353        E: serde::de::Error,
354    {
355        Ok(v.into())
356    }
357}
358
359macro_rules! new_raw_repr {
360    ($borrowed_bytes:expr) => {
361        if $borrowed_bytes.len() <= MAX_INLINE_SIZE {
362            RawRepr::new_inline($borrowed_bytes)
363        } else {
364            let mut cache = CACHE.lock().unwrap();
365            if let Some(existing) = cache.get($borrowed_bytes) {
366                RawRepr::from_storage(existing)
367            } else {
368                RawRepr::new_for_storage(&mut cache, $borrowed_bytes)
369            }
370        }
371    };
372}
373
374/// An immutable bytestring type which only stores a single copy of each string allocated.
375/// Internally represented as a shared pointer to the backing allocation. Occupies a single pointer width.
376///
377/// # Small strings
378///
379/// Very short strings are stored inline in the pointer with bit-tagging, so no allocations are
380/// performed.
381///
382/// # Performance
383///
384/// It's slower to construct than a regular `BString` but trades that for reduced standing memory
385/// usage by deduplicating strings. `PartialEq` and `Hash` are implemented on the underlying pointer
386/// value rather than the pointed-to data for faster equality comparisons and indexing, which is
387/// sound by virtue of the type guaranteeing that only one `FlyByteStr` pointer value will exist at
388/// any time for a given string's contents.
389///
390/// As with any performance optimization, you should only use this type if you can measure the
391/// benefit it provides to your program. Pay careful attention to creating `FlyByteStr`s in hot
392/// paths as it may regress runtime performance.
393///
394/// # Allocation lifecycle
395///
396/// Intended for long-running system services with user-provided values, `FlyByteStr`s are removed
397/// from the global cache when the last reference to them is dropped. While this incurs some
398/// overhead it is important to prevent the value cache from becoming a denial-of-service vector.
399#[derive(Clone, Eq, Hash, PartialEq)]
400pub struct FlyByteStr(RawRepr);
401
402static_assertions::assert_eq_size!(FlyByteStr, usize);
403
404impl FlyByteStr {
405    /// Create a `FlyByteStr`, allocating it in the cache if the value is not already cached.
406    ///
407    /// # Performance
408    ///
409    /// Creating an instance of this type requires accessing the global cache of strings, which
410    /// involves taking a lock. When multiple threads are allocating lots of strings there may be
411    /// contention.
412    ///
413    /// Each string allocated is hashed for lookup in the cache.
414    pub fn new(s: impl AsRef<[u8]> + Into<Vec<u8>>) -> Self {
415        Self(RawRepr::new(s))
416    }
417
418    /// Returns the underlying bytestring slice.
419    #[inline]
420    pub fn as_bstr(&self) -> &BStr {
421        BStr::new(self.0.as_bytes())
422    }
423
424    /// Returns the underlying byte slice.
425    #[inline]
426    pub fn as_bytes(&self) -> &[u8] {
427        self.0.as_bytes()
428    }
429}
430
431impl Default for FlyByteStr {
432    #[inline]
433    fn default() -> Self {
434        Self::new(b"")
435    }
436}
437
438impl From<&'_ [u8]> for FlyByteStr {
439    #[inline]
440    fn from(s: &[u8]) -> Self {
441        Self::new(s)
442    }
443}
444
445impl From<&'_ BStr> for FlyByteStr {
446    #[inline]
447    fn from(s: &BStr) -> Self {
448        let bytes: &[u8] = s.as_ref();
449        Self(new_raw_repr!(bytes))
450    }
451}
452
453impl From<&'_ str> for FlyByteStr {
454    #[inline]
455    fn from(s: &str) -> Self {
456        Self::new(s)
457    }
458}
459
460impl From<&'_ Vec<u8>> for FlyByteStr {
461    #[inline]
462    fn from(s: &Vec<u8>) -> Self {
463        Self(new_raw_repr!(&s[..]))
464    }
465}
466
467impl From<&'_ String> for FlyByteStr {
468    #[inline]
469    fn from(s: &String) -> Self {
470        Self::new(&**s)
471    }
472}
473
474impl From<Vec<u8>> for FlyByteStr {
475    #[inline]
476    fn from(s: Vec<u8>) -> Self {
477        Self::new(s)
478    }
479}
480
481impl From<String> for FlyByteStr {
482    #[inline]
483    fn from(s: String) -> Self {
484        Self::new(s)
485    }
486}
487
488impl From<BString> for FlyByteStr {
489    #[inline]
490    fn from(s: BString) -> Self {
491        Self::new(s)
492    }
493}
494
495impl From<Box<[u8]>> for FlyByteStr {
496    #[inline]
497    fn from(s: Box<[u8]>) -> Self {
498        Self::new(s)
499    }
500}
501
502impl From<Box<str>> for FlyByteStr {
503    #[inline]
504    fn from(s: Box<str>) -> Self {
505        Self(new_raw_repr!(s.as_bytes()))
506    }
507}
508
509impl From<&'_ Box<[u8]>> for FlyByteStr {
510    #[inline]
511    fn from(s: &'_ Box<[u8]>) -> Self {
512        Self(new_raw_repr!(&**s))
513    }
514}
515
516impl From<&Box<str>> for FlyByteStr {
517    #[inline]
518    fn from(s: &Box<str>) -> Self {
519        Self::new(&**s)
520    }
521}
522
523impl Into<BString> for FlyByteStr {
524    #[inline]
525    fn into(self) -> BString {
526        self.as_bstr().to_owned()
527    }
528}
529
530impl Into<Vec<u8>> for FlyByteStr {
531    #[inline]
532    fn into(self) -> Vec<u8> {
533        self.as_bytes().to_owned()
534    }
535}
536
537impl From<FlyStr> for FlyByteStr {
538    #[inline]
539    fn from(s: FlyStr) -> FlyByteStr {
540        Self(s.0)
541    }
542}
543
544impl TryInto<String> for FlyByteStr {
545    type Error = std::string::FromUtf8Error;
546
547    #[inline]
548    fn try_into(self) -> Result<String, Self::Error> {
549        String::from_utf8(self.into())
550    }
551}
552
553impl Deref for FlyByteStr {
554    type Target = BStr;
555
556    #[inline]
557    fn deref(&self) -> &Self::Target {
558        self.as_bstr()
559    }
560}
561
562impl AsRef<BStr> for FlyByteStr {
563    #[inline]
564    fn as_ref(&self) -> &BStr {
565        self.as_bstr()
566    }
567}
568
569impl PartialOrd for FlyByteStr {
570    #[inline]
571    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
572        Some(self.cmp(other))
573    }
574}
575impl Ord for FlyByteStr {
576    #[inline]
577    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
578        self.as_bstr().cmp(other.as_bstr())
579    }
580}
581
582impl PartialEq<[u8]> for FlyByteStr {
583    #[inline]
584    fn eq(&self, other: &[u8]) -> bool {
585        self.as_bytes() == other
586    }
587}
588
589impl PartialEq<BStr> for FlyByteStr {
590    #[inline]
591    fn eq(&self, other: &BStr) -> bool {
592        self.as_bytes() == other
593    }
594}
595
596impl PartialEq<str> for FlyByteStr {
597    #[inline]
598    fn eq(&self, other: &str) -> bool {
599        self.as_bytes() == other.as_bytes()
600    }
601}
602
603impl PartialEq<&'_ [u8]> for FlyByteStr {
604    #[inline]
605    fn eq(&self, other: &&[u8]) -> bool {
606        self.as_bytes() == *other
607    }
608}
609
610impl PartialEq<&'_ BStr> for FlyByteStr {
611    #[inline]
612    fn eq(&self, other: &&BStr) -> bool {
613        self.as_bstr() == *other
614    }
615}
616
617impl PartialEq<&'_ str> for FlyByteStr {
618    #[inline]
619    fn eq(&self, other: &&str) -> bool {
620        self.as_bytes() == other.as_bytes()
621    }
622}
623
624impl PartialEq<String> for FlyByteStr {
625    #[inline]
626    fn eq(&self, other: &String) -> bool {
627        self.as_bytes() == other.as_bytes()
628    }
629}
630
631impl PartialEq<FlyStr> for FlyByteStr {
632    #[inline]
633    fn eq(&self, other: &FlyStr) -> bool {
634        self.0 == other.0
635    }
636}
637
638impl PartialEq<&'_ FlyStr> for FlyByteStr {
639    #[inline]
640    fn eq(&self, other: &&FlyStr) -> bool {
641        self.0 == other.0
642    }
643}
644
645impl PartialOrd<str> for FlyByteStr {
646    #[inline]
647    fn partial_cmp(&self, other: &str) -> Option<std::cmp::Ordering> {
648        self.as_bstr().partial_cmp(other)
649    }
650}
651
652impl PartialOrd<&str> for FlyByteStr {
653    #[inline]
654    fn partial_cmp(&self, other: &&str) -> Option<std::cmp::Ordering> {
655        self.as_bstr().partial_cmp(other)
656    }
657}
658
659impl PartialOrd<FlyStr> for FlyByteStr {
660    #[inline]
661    fn partial_cmp(&self, other: &FlyStr) -> Option<std::cmp::Ordering> {
662        self.as_bstr().partial_cmp(other.as_str())
663    }
664}
665
666impl Debug for FlyByteStr {
667    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
668        Debug::fmt(self.as_bstr(), f)
669    }
670}
671
672impl Display for FlyByteStr {
673    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
674        Display::fmt(self.as_bstr(), f)
675    }
676}
677
678impl Serialize for FlyByteStr {
679    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
680        serializer.serialize_bytes(self.as_bytes())
681    }
682}
683
684impl<'d> Deserialize<'d> for FlyByteStr {
685    fn deserialize<D: Deserializer<'d>>(deserializer: D) -> Result<Self, D::Error> {
686        deserializer.deserialize_bytes(FlyByteStrVisitor)
687    }
688}
689
690struct FlyByteStrVisitor;
691
692impl<'de> Visitor<'de> for FlyByteStrVisitor {
693    type Value = FlyByteStr;
694    fn expecting(&self, formatter: &mut Formatter<'_>) -> FmtResult {
695        formatter.write_str("a string, a bytestring, or a sequence of bytes")
696    }
697
698    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
699    where
700        E: serde::de::Error,
701    {
702        Ok(FlyByteStr::from(v))
703    }
704
705    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
706    where
707        E: serde::de::Error,
708    {
709        Ok(FlyByteStr::from(v))
710    }
711
712    fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
713    where
714        E: serde::de::Error,
715    {
716        Ok(FlyByteStr::from(v))
717    }
718
719    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
720    where
721        A: serde::de::SeqAccess<'de>,
722    {
723        let mut bytes = vec![];
724        while let Some(b) = seq.next_element::<u8>()? {
725            bytes.push(b);
726        }
727        Ok(FlyByteStr::from(bytes))
728    }
729}
730
731#[repr(C)] // Guarantee predictable field ordering.
732union RawRepr {
733    /// Strings longer than MAX_INLINE_SIZE are allocated using `raw::Payload`, which have a thin
734    /// pointer representation. This means that `heap` variants of `Storage` will always have the
735    /// pointer contents aligned and this variant will never have its least significant bit set.
736    ///
737    /// We store a `NonNull` so we can have guaranteed pointer layout.
738    heap: NonNull<raw::Payload>,
739
740    /// Strings shorter than or equal in length to MAX_INLINE_SIZE are stored in this union variant.
741    /// The first byte is reserved for the size of the inline string, and the remaining bytes are
742    /// used for the string itself. The first byte has its least significant bit set to 1 to
743    /// distinguish inline strings from heap-allocated ones, and the size is stored in the remaining
744    /// 7 bits.
745    inline: InlineRepr,
746}
747
748// The inline variant should not cause us to occupy more space than the heap variant alone.
749static_assertions::assert_eq_size!(NonNull<raw::Payload>, RawRepr);
750
751// Alignment of the payload pointers must be >1 in order to have space for the mask bit at the
752// bottom.
753static_assertions::const_assert!(std::mem::align_of::<raw::Payload>() > 1);
754
755// The short string optimization makes little-endian layout assumptions with the first byte being
756// the least significant.
757static_assertions::assert_type_eq_all!(byteorder::NativeEndian, byteorder::LittleEndian);
758
759/// An enum with an actual discriminant that allows us to limit the reach of unsafe code in the
760/// implementation without affecting the stored size of `RawRepr`.
761enum SafeRepr<'a> {
762    Heap(NonNull<raw::Payload>),
763    Inline(&'a InlineRepr),
764}
765
766// SAFETY: FlyStr can be dropped from any thread.
767unsafe impl Send for RawRepr {}
768// SAFETY: FlyStr has an immutable public API.
769unsafe impl Sync for RawRepr {}
770
771impl RawRepr {
772    fn new_str(s: impl AsRef<str>) -> Self {
773        let borrowed = s.as_ref();
774        new_raw_repr!(borrowed.as_bytes())
775    }
776
777    fn new(s: impl AsRef<[u8]>) -> Self {
778        let borrowed = s.as_ref();
779        new_raw_repr!(borrowed)
780    }
781
782    #[inline]
783    fn new_inline(s: &[u8]) -> Self {
784        assert!(s.len() <= MAX_INLINE_SIZE);
785        let new = Self { inline: InlineRepr::new(s) };
786        assert!(new.is_inline(), "least significant bit must be 1 for inline strings");
787        new
788    }
789
790    #[inline]
791    fn from_storage(storage: &Storage) -> Self {
792        if storage.inc_ref() == 0 {
793            // Another thread is trying to lock the cache and free this string. They already
794            // released their refcount, so give it back to them. This will prevent this thread
795            // and other threads from attempting to free the string if they drop the refcount back
796            // down.
797            storage.inc_ref();
798        }
799        Self { heap: storage.0 }
800    }
801
802    #[inline]
803    fn new_for_storage(cache: &mut AHashSet<Storage>, bytes: &[u8]) -> Self {
804        assert!(bytes.len() > MAX_INLINE_SIZE);
805        // `Payload::alloc` starts the refcount at 1.
806        let new_storage = raw::Payload::alloc(bytes);
807
808        let for_cache = Storage(new_storage);
809        let new = Self { heap: new_storage };
810        assert!(!new.is_inline(), "least significant bit must be 0 for heap strings");
811        cache.insert(for_cache);
812        new
813    }
814
815    #[inline]
816    fn is_inline(&self) -> bool {
817        // SAFETY: it is always OK to interpret a pointer as byte array as long as we don't expect
818        // to retain provenance.
819        (unsafe { self.inline.masked_len } & 1) == 1
820    }
821
822    #[inline]
823    fn project(&self) -> SafeRepr<'_> {
824        if self.is_inline() {
825            // SAFETY: Just checked that this is the inline variant.
826            SafeRepr::Inline(unsafe { &self.inline })
827        } else {
828            // SAFETY: Just checked that this is the heap variant.
829            SafeRepr::Heap(unsafe { self.heap })
830        }
831    }
832
833    #[inline]
834    fn as_bytes(&self) -> &[u8] {
835        match self.project() {
836            // SAFETY: FlyStr owns the payload stored as a NonNull, it is live as long as `FlyStr`.
837            SafeRepr::Heap(ptr) => unsafe { &*raw::Payload::bytes(ptr.as_ptr()) },
838            SafeRepr::Inline(i) => i.as_bytes(),
839        }
840    }
841}
842
843impl PartialEq for RawRepr {
844    #[inline]
845    fn eq(&self, other: &Self) -> bool {
846        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
847        // to retain provenance.
848        let lhs = unsafe { &self.inline };
849        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
850        // to retain provenance.
851        let rhs = unsafe { &other.inline };
852        lhs.eq(rhs)
853    }
854}
855impl Eq for RawRepr {}
856
857impl Hash for RawRepr {
858    fn hash<H: Hasher>(&self, h: &mut H) {
859        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
860        // to retain provenance.
861        let this = unsafe { &self.inline };
862        this.hash(h);
863    }
864}
865
866impl Clone for RawRepr {
867    fn clone(&self) -> Self {
868        match self.project() {
869            SafeRepr::Heap(ptr) => {
870                // SAFETY: We own this payload, we know it's live because we are.
871                unsafe {
872                    raw::Payload::inc_ref(ptr.as_ptr());
873                }
874                Self { heap: ptr }
875            }
876            SafeRepr::Inline(&inline) => Self { inline },
877        }
878    }
879}
880
881impl Drop for RawRepr {
882    fn drop(&mut self) {
883        if !self.is_inline() {
884            // SAFETY: We checked above that this is the heap repr.
885            let heap = unsafe { self.heap };
886
887            // Decrementing the refcount before locking the cache causes the following failure mode:
888            //
889            // 1. We drop the refcount to 0.
890            // 2. Another thread finds the string we're about to drop and increments the refcount
891            //    back up to 1.
892            // 3. That thread drops its refcount, and also sees the refcount drop to 0.
893            // 4. That thread locks the cache, removes the value, and drops the payload. This leaves
894            //    us with a dangling pointer to the dropped payload.
895            // 5. We lock the cache and go to look up our value in the cache. Our payload pointer is
896            //    dangling now, but we don't know that. If we try to read through our dangling
897            //    pointer, we cause UB.
898            //
899            // To account for this failure mode and still optimistically drop our refcount, we
900            // modify the procedure slightly:
901            //
902            // 1. We drop the refcount to 0.
903            // 2. Another thread finds the string we're about to drop and increments the refcount
904            //    back up to 1. It notices that the refcount incremented from 0 to 1, and so knows
905            //    that our thread will try to drop it. While still holding the cache lock, that
906            //    thread increments the refcount again from 1 to 2. This "gives back" the refcount
907            //    to our thread.
908            // 3. That thread drops its refcount, and sees the refcount drop to 1. It won't try to
909            //    drop the payload this time.
910            // 4. We lock the cache, and decrement the refcount a second time. If it decremented
911            //    from 0 or 1, then we know that no other threads are currently holding references
912            //    to it and we can safely drop it ourselves.
913
914            // SAFETY: The payload is live.
915            let prev_refcount = unsafe { raw::Payload::dec_ref(heap.as_ptr()) };
916
917            // If we held the final refcount outside of the cache, try to remove the string.
918            if prev_refcount == 1 {
919                let mut cache = CACHE.lock().unwrap();
920
921                let current_refcount = unsafe { raw::Payload::dec_ref(heap.as_ptr()) };
922                if current_refcount <= 1 {
923                    // If the refcount was still 0 after acquiring the cache lock, no other thread
924                    // looked up this payload between optimistically decrementing the refcount and
925                    // now. If the refcount was 1, then another thread did, but dropped its refcount
926                    // before we got the cache lock. Either way, we can safely remove the string
927                    // from the cache and free it.
928
929                    let bytes = unsafe { &*raw::Payload::bytes(heap.as_ptr()) };
930                    assert!(
931                        cache.remove(bytes),
932                        "cache did not contain bytes, but this thread didn't remove them yet",
933                    );
934
935                    // Get out of the critical section as soon as possible
936                    drop(cache);
937
938                    // SAFETY: The payload is live.
939                    unsafe { raw::Payload::dealloc(heap.as_ptr()) };
940                } else {
941                    // Another thread looked up this payload, made a reference to it, and gave our
942                    // refcount back to us for a minmium refcount of 2. We re-removed our refcount,
943                    // giving a minimum of one. This means it's no longer our responsibility to
944                    // deallocate the string, so we ended up needlessly locking the cache.
945                }
946            }
947        }
948    }
949}
950
951#[derive(Clone, Copy, Hash, PartialEq)]
952#[repr(C)] // Preserve field ordering.
953struct InlineRepr {
954    /// The first byte, which corresponds to the LSB of a pointer in the other variant.
955    ///
956    /// When the first bit is `1` the rest of this byte stores the length of the inline string.
957    masked_len: u8,
958    /// Inline string contents.
959    contents: [u8; MAX_INLINE_SIZE],
960}
961
962/// We can store small strings up to 1 byte less than the size of the pointer to the heap-allocated
963/// string.
964const MAX_INLINE_SIZE: usize = std::mem::size_of::<NonNull<raw::Payload>>() - 1;
965
966// Guard rail to make sure we never end up with an incorrect inline size encoding. Ensure that
967// MAX_INLINE_SIZE is always smaller than the maximum size we can represent in a byte with the LSB
968// reserved.
969static_assertions::const_assert!((std::u8::MAX >> 1) as usize >= MAX_INLINE_SIZE);
970
971impl InlineRepr {
972    #[inline]
973    fn new(s: &[u8]) -> Self {
974        assert!(s.len() <= MAX_INLINE_SIZE);
975
976        // Set the first byte to the length of the inline string with LSB masked to 1.
977        let masked_len = ((s.len() as u8) << 1) | 1;
978
979        let mut contents = [0u8; MAX_INLINE_SIZE];
980        contents[..s.len()].copy_from_slice(s);
981
982        Self { masked_len, contents }
983    }
984
985    #[inline]
986    fn as_bytes(&self) -> &[u8] {
987        let len = self.masked_len >> 1;
988        &self.contents[..len as usize]
989    }
990}
991
992#[cfg(test)]
993mod tests {
994    use super::*;
995    use static_assertions::{const_assert, const_assert_eq};
996    use std::collections::BTreeSet;
997    use test_case::test_case;
998
999    // These tests all manipulate the process-global cache in the parent module. On target devices
1000    // we run each test case in its own process, so the test cases can't pollute each other. On
1001    // host, we run tests with a process for each suite (which is the Rust upstream default), and
1002    // we need to manually isolate the tests from each other.
1003    #[cfg(not(target_os = "fuchsia"))]
1004    use serial_test::serial;
1005
1006    fn reset_global_cache() {
1007        // We still want subsequent tests to be able to run if one in the same process panics.
1008        match CACHE.lock() {
1009            Ok(mut c) => *c = AHashSet::new(),
1010            Err(e) => *e.into_inner() = AHashSet::new(),
1011        }
1012    }
1013    fn num_strings_in_global_cache() -> usize {
1014        CACHE.lock().unwrap().len()
1015    }
1016
1017    impl RawRepr {
1018        fn refcount(&self) -> Option<usize> {
1019            match self.project() {
1020                SafeRepr::Heap(ptr) => {
1021                    // SAFETY: The payload is live as long as the repr is live.
1022                    let count = unsafe { raw::Payload::refcount(ptr.as_ptr()) };
1023                    Some(count)
1024                }
1025                SafeRepr::Inline(_) => None,
1026            }
1027        }
1028    }
1029
1030    const SHORT_STRING: &str = "hello";
1031    const_assert!(SHORT_STRING.len() < MAX_INLINE_SIZE);
1032
1033    const MAX_LEN_SHORT_STRING: &str = "hello!!";
1034    const_assert_eq!(MAX_LEN_SHORT_STRING.len(), MAX_INLINE_SIZE);
1035
1036    const MIN_LEN_LONG_STRING: &str = "hello!!!";
1037    const_assert_eq!(MIN_LEN_LONG_STRING.len(), MAX_INLINE_SIZE + 1);
1038
1039    const LONG_STRING: &str = "hello, world!!!!!!!!!!!!!!!!!!!!";
1040    const_assert!(LONG_STRING.len() > MAX_INLINE_SIZE);
1041
1042    const SHORT_NON_UTF8: &[u8] = b"\xF0\x28\x8C\x28";
1043    const_assert!(SHORT_NON_UTF8.len() < MAX_INLINE_SIZE);
1044
1045    const LONG_NON_UTF8: &[u8] = b"\xF0\x28\x8C\x28\xF0\x28\x8C\x28";
1046    const_assert!(LONG_NON_UTF8.len() > MAX_INLINE_SIZE);
1047
1048    #[test_case("" ; "empty string")]
1049    #[test_case(SHORT_STRING ; "short strings")]
1050    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1051    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1052    #[test_case(LONG_STRING ; "long strings")]
1053    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1054    fn string_formatting_is_equivalent_to_str(original: &str) {
1055        reset_global_cache();
1056
1057        let cached = FlyStr::new(original);
1058        assert_eq!(format!("{original}"), format!("{cached}"));
1059        assert_eq!(format!("{original:?}"), format!("{cached:?}"));
1060
1061        let cached = FlyByteStr::new(original);
1062        assert_eq!(format!("{original}"), format!("{cached}"));
1063        assert_eq!(format!("{original:?}"), format!("{cached:?}"));
1064    }
1065
1066    #[test_case("" ; "empty string")]
1067    #[test_case(SHORT_STRING ; "short strings")]
1068    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1069    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1070    #[test_case(LONG_STRING ; "long strings")]
1071    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1072    fn string_equality_works(contents: &str) {
1073        reset_global_cache();
1074
1075        let cached = FlyStr::new(contents);
1076        let bytes_cached = FlyByteStr::new(contents);
1077        assert_eq!(cached, cached.clone(), "must be equal to itself");
1078        assert_eq!(cached, contents, "must be equal to the original");
1079        assert_eq!(cached, contents.to_owned(), "must be equal to an owned copy of the original");
1080        assert_eq!(cached, bytes_cached);
1081
1082        // test inequality too
1083        assert_ne!(cached, "goodbye");
1084        assert_ne!(bytes_cached, "goodbye");
1085    }
1086
1087    #[test_case("", SHORT_STRING ; "empty and short string")]
1088    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1089    #[test_case(MAX_LEN_SHORT_STRING, MIN_LEN_LONG_STRING ; "short and long strings")]
1090    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1091    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1092    fn string_comparison_works(lesser_contents: &str, greater_contents: &str) {
1093        reset_global_cache();
1094
1095        let lesser = FlyStr::new(lesser_contents);
1096        let lesser_bytes = FlyByteStr::from(lesser_contents);
1097        let greater = FlyStr::new(greater_contents);
1098        let greater_bytes = FlyByteStr::from(greater_contents);
1099
1100        // lesser as method receiver
1101        assert!(lesser < greater);
1102        assert!(lesser < greater_bytes);
1103        assert!(lesser_bytes < greater);
1104        assert!(lesser_bytes < greater_bytes);
1105        assert!(lesser <= greater);
1106        assert!(lesser <= greater_bytes);
1107        assert!(lesser_bytes <= greater);
1108        assert!(lesser_bytes <= greater_bytes);
1109
1110        // greater as method receiver
1111        assert!(greater > lesser);
1112        assert!(greater > lesser_bytes);
1113        assert!(greater_bytes > lesser);
1114        assert!(greater >= lesser);
1115        assert!(greater >= lesser_bytes);
1116        assert!(greater_bytes >= lesser);
1117        assert!(greater_bytes >= lesser_bytes);
1118    }
1119
1120    #[test_case("" ; "empty string")]
1121    #[test_case(SHORT_STRING ; "short strings")]
1122    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1123    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1124    fn no_allocations_for_short_strings(contents: &str) {
1125        reset_global_cache();
1126        assert_eq!(num_strings_in_global_cache(), 0);
1127
1128        let original = FlyStr::new(contents);
1129        assert_eq!(num_strings_in_global_cache(), 0);
1130        assert_eq!(original.0.refcount(), None);
1131
1132        let cloned = original.clone();
1133        assert_eq!(num_strings_in_global_cache(), 0);
1134        assert_eq!(cloned.0.refcount(), None);
1135
1136        let deduped = FlyStr::new(contents);
1137        assert_eq!(num_strings_in_global_cache(), 0);
1138        assert_eq!(deduped.0.refcount(), None);
1139    }
1140
1141    #[test_case("" ; "empty string")]
1142    #[test_case(SHORT_STRING ; "short strings")]
1143    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1144    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1145    fn no_allocations_for_short_bytestrings(contents: &str) {
1146        reset_global_cache();
1147        assert_eq!(num_strings_in_global_cache(), 0);
1148
1149        let original = FlyByteStr::new(contents);
1150        assert_eq!(num_strings_in_global_cache(), 0);
1151        assert_eq!(original.0.refcount(), None);
1152
1153        let cloned = original.clone();
1154        assert_eq!(num_strings_in_global_cache(), 0);
1155        assert_eq!(cloned.0.refcount(), None);
1156
1157        let deduped = FlyByteStr::new(contents);
1158        assert_eq!(num_strings_in_global_cache(), 0);
1159        assert_eq!(deduped.0.refcount(), None);
1160    }
1161
1162    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1163    #[test_case(LONG_STRING ; "long strings")]
1164    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1165    fn only_one_copy_allocated_for_long_strings(contents: &str) {
1166        reset_global_cache();
1167
1168        assert_eq!(num_strings_in_global_cache(), 0);
1169
1170        let original = FlyStr::new(contents);
1171        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1172        assert_eq!(original.0.refcount(), Some(1), "one copy on stack");
1173
1174        let cloned = original.clone();
1175        assert_eq!(num_strings_in_global_cache(), 1, "cloning just incremented refcount");
1176        assert_eq!(cloned.0.refcount(), Some(2), "two copies on stack");
1177
1178        let deduped = FlyStr::new(contents);
1179        assert_eq!(num_strings_in_global_cache(), 1, "new string was deduped");
1180        assert_eq!(deduped.0.refcount(), Some(3), "three copies on stack");
1181    }
1182
1183    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1184    #[test_case(LONG_STRING ; "long strings")]
1185    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1186    fn only_one_copy_allocated_for_long_bytestrings(contents: &str) {
1187        reset_global_cache();
1188
1189        assert_eq!(num_strings_in_global_cache(), 0);
1190
1191        let original = FlyByteStr::new(contents);
1192        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1193        assert_eq!(original.0.refcount(), Some(1), "one copy on stack");
1194
1195        let cloned = original.clone();
1196        assert_eq!(num_strings_in_global_cache(), 1, "cloning just incremented refcount");
1197        assert_eq!(cloned.0.refcount(), Some(2), "two copies on stack");
1198
1199        let deduped = FlyByteStr::new(contents);
1200        assert_eq!(num_strings_in_global_cache(), 1, "new string was deduped");
1201        assert_eq!(deduped.0.refcount(), Some(3), "three copies on stack");
1202    }
1203
1204    #[test]
1205    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1206    fn utf8_and_bytestrings_share_the_cache() {
1207        reset_global_cache();
1208
1209        assert_eq!(num_strings_in_global_cache(), 0, "cache is empty");
1210
1211        let _utf8 = FlyStr::from(MIN_LEN_LONG_STRING);
1212        assert_eq!(num_strings_in_global_cache(), 1, "string was allocated");
1213
1214        let _bytes = FlyByteStr::from(MIN_LEN_LONG_STRING);
1215        assert_eq!(num_strings_in_global_cache(), 1, "bytestring was pulled from cache");
1216    }
1217
1218    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1219    #[test_case(LONG_STRING ; "long strings")]
1220    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1221    fn cached_strings_dropped_when_refs_dropped(contents: &str) {
1222        reset_global_cache();
1223
1224        let alloced = FlyStr::new(contents);
1225        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1226        drop(alloced);
1227        assert_eq!(num_strings_in_global_cache(), 0, "last reference dropped");
1228    }
1229
1230    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1231    #[test_case(LONG_STRING ; "long strings")]
1232    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1233    fn cached_bytestrings_dropped_when_refs_dropped(contents: &str) {
1234        reset_global_cache();
1235
1236        let alloced = FlyByteStr::new(contents);
1237        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1238        drop(alloced);
1239        assert_eq!(num_strings_in_global_cache(), 0, "last reference dropped");
1240    }
1241
1242    #[test_case("", SHORT_STRING ; "empty and short string")]
1243    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1244    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1245    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1246    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1247    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1248    fn equality_and_hashing_with_pointer_value_works_correctly(first: &str, second: &str) {
1249        reset_global_cache();
1250
1251        let first = FlyStr::new(first);
1252        let second = FlyStr::new(second);
1253
1254        let mut set = AHashSet::new();
1255        set.insert(first.clone());
1256        assert!(set.contains(&first));
1257        assert!(!set.contains(&second));
1258
1259        // re-insert the same cmstring
1260        set.insert(first);
1261        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1262
1263        set.insert(second.clone());
1264        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1265        assert!(set.contains(&second));
1266
1267        // re-insert the second cmstring
1268        set.insert(second);
1269        assert_eq!(set.len(), 2);
1270    }
1271
1272    #[test_case("", SHORT_STRING ; "empty and short string")]
1273    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1274    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1275    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1276    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1277    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1278    fn byte_equality_and_hashing_with_pointer_value_works_correctly(first: &str, second: &str) {
1279        reset_global_cache();
1280
1281        let first = FlyByteStr::new(first);
1282        let second = FlyByteStr::new(second);
1283
1284        let mut set = AHashSet::new();
1285        set.insert(first.clone());
1286        assert!(set.contains(&first));
1287        assert!(!set.contains(&second));
1288
1289        // re-insert the same string
1290        set.insert(first);
1291        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1292
1293        set.insert(second.clone());
1294        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1295        assert!(set.contains(&second));
1296
1297        // re-insert the second string
1298        set.insert(second);
1299        assert_eq!(set.len(), 2);
1300    }
1301
1302    #[test_case("", SHORT_STRING ; "empty and short string")]
1303    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1304    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1305    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1306    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1307    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1308    fn comparison_for_btree_storage_works(first: &str, second: &str) {
1309        reset_global_cache();
1310
1311        let first = FlyStr::new(first);
1312        let second = FlyStr::new(second);
1313
1314        let mut set = BTreeSet::new();
1315        set.insert(first.clone());
1316        assert!(set.contains(&first));
1317        assert!(!set.contains(&second));
1318
1319        // re-insert the same cmstring
1320        set.insert(first);
1321        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1322
1323        set.insert(second.clone());
1324        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1325        assert!(set.contains(&second));
1326
1327        // re-insert the second cmstring
1328        set.insert(second);
1329        assert_eq!(set.len(), 2);
1330    }
1331
1332    #[test_case("", SHORT_STRING ; "empty and short string")]
1333    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1334    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1335    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1336    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1337    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1338    fn byte_comparison_for_btree_storage_works(first: &str, second: &str) {
1339        reset_global_cache();
1340
1341        let first = FlyByteStr::new(first);
1342        let second = FlyByteStr::new(second);
1343
1344        let mut set = BTreeSet::new();
1345        set.insert(first.clone());
1346        assert!(set.contains(&first));
1347        assert!(!set.contains(&second));
1348
1349        // re-insert the same string
1350        set.insert(first);
1351        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1352
1353        set.insert(second.clone());
1354        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1355        assert!(set.contains(&second));
1356
1357        // re-insert the second string
1358        set.insert(second);
1359        assert_eq!(set.len(), 2);
1360    }
1361
1362    #[test_case("" ; "empty string")]
1363    #[test_case(SHORT_STRING ; "short strings")]
1364    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1365    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1366    #[test_case(LONG_STRING ; "long strings")]
1367    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1368    fn serde_works(contents: &str) {
1369        reset_global_cache();
1370
1371        let s = FlyStr::new(contents);
1372        let as_json = serde_json::to_string(&s).unwrap();
1373        assert_eq!(as_json, format!("\"{contents}\""));
1374        assert_eq!(s, serde_json::from_str::<FlyStr>(&as_json).unwrap());
1375    }
1376
1377    #[test_case("" ; "empty string")]
1378    #[test_case(SHORT_STRING ; "short strings")]
1379    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1380    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1381    #[test_case(LONG_STRING ; "long strings")]
1382    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1383    fn serde_works_bytestring(contents: &str) {
1384        reset_global_cache();
1385
1386        let s = FlyByteStr::new(contents);
1387        let as_json = serde_json::to_string(&s).unwrap();
1388        assert_eq!(s, serde_json::from_str::<FlyByteStr>(&as_json).unwrap());
1389    }
1390
1391    #[test_case(SHORT_NON_UTF8 ; "short non-utf8 bytestring")]
1392    #[test_case(LONG_NON_UTF8 ; "long non-utf8 bytestring")]
1393    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1394    fn non_utf8_works(contents: &[u8]) {
1395        reset_global_cache();
1396
1397        let res: Result<FlyStr, _> = FlyByteStr::from(contents).try_into();
1398        res.unwrap_err();
1399    }
1400
1401    #[test_case("" ; "empty string")]
1402    #[test_case(SHORT_STRING ; "short strings")]
1403    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1404    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1405    #[test_case(LONG_STRING ; "long strings")]
1406    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1407    fn flystr_to_flybytestr_and_back(contents: &str) {
1408        reset_global_cache();
1409
1410        let bytestr = FlyByteStr::from(contents);
1411        let flystr = FlyStr::try_from(bytestr.clone()).unwrap();
1412        assert_eq!(bytestr, flystr);
1413        let bytestr2 = FlyByteStr::from(flystr.clone());
1414        assert_eq!(bytestr, bytestr2);
1415    }
1416}