flyweights/
lib.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Types implementing the [flyweight pattern](https://en.wikipedia.org/wiki/Flyweight_pattern)
6//! for reusing object allocations.
7
8#![warn(missing_docs)]
9
10mod raw;
11
12use ahash::AHashSet;
13use bstr::{BStr, BString};
14use serde::de::{Deserializer, Visitor};
15use serde::ser::Serializer;
16use serde::{Deserialize, Serialize};
17use std::borrow::Borrow;
18use std::fmt::{Debug, Display, Formatter, Result as FmtResult};
19use std::hash::{Hash, Hasher};
20use std::ops::Deref;
21use std::ptr::NonNull;
22use std::sync::Mutex;
23
24/// The global string cache for `FlyStr`.
25///
26/// If a live `FlyStr` contains an `Storage`, the `Storage` must also be in this cache and it must
27/// have a refcount of >= 2.
28static CACHE: std::sync::LazyLock<Mutex<AHashSet<Storage>>> =
29    std::sync::LazyLock::new(|| Mutex::new(AHashSet::new()));
30
31/// Wrapper type for stored strings that lets us query the cache without an owned value.
32#[repr(transparent)]
33struct Storage(NonNull<raw::Payload>);
34
35// SAFETY: FlyStr storage is always safe to send across threads.
36unsafe impl Send for Storage {}
37// SAFETY: FlyStr storage is always safe to share across threads.
38unsafe impl Sync for Storage {}
39
40impl PartialEq for Storage {
41    #[inline]
42    fn eq(&self, other: &Self) -> bool {
43        self.as_bytes() == other.as_bytes()
44    }
45}
46
47impl Eq for Storage {}
48
49impl Hash for Storage {
50    #[inline]
51    fn hash<H: Hasher>(&self, state: &mut H) {
52        self.as_bytes().hash(state)
53    }
54}
55
56impl Storage {
57    #[inline]
58    fn inc_ref(&self) -> usize {
59        // SAFETY: `Storage` always points to a valid `Payload`.
60        unsafe { raw::Payload::inc_ref(self.0.as_ptr()) }
61    }
62
63    #[inline]
64    fn as_bytes(&self) -> &[u8] {
65        // SAFETY: `Storage` always points to a valid `Payload`.
66        unsafe { &*raw::Payload::bytes(self.0.as_ptr()) }
67    }
68}
69
70impl Borrow<[u8]> for Storage {
71    #[inline]
72    fn borrow(&self) -> &[u8] {
73        self.as_bytes()
74    }
75}
76
77/// An immutable string type which only stores a single copy of each string allocated. Internally
78/// represented as a shared pointer to the backing allocation. Occupies a single pointer width.
79///
80/// # Small strings
81///
82/// Very short strings are stored inline in the pointer with bit-tagging, so no allocations are
83/// performed.
84///
85/// # Performance
86///
87/// It's slower to construct than a regular `String` but trades that for reduced standing memory
88/// usage by deduplicating strings. `PartialEq` and `Hash` are implemented on the underlying pointer
89/// value rather than the pointed-to data for faster equality comparisons and indexing, which is
90/// sound by virtue of the type guaranteeing that only one `FlyStr` pointer value will exist at
91/// any time for a given string's contents.
92///
93/// As with any performance optimization, you should only use this type if you can measure the
94/// benefit it provides to your program. Pay careful attention to creating `FlyStr`s in hot paths
95/// as it may regress runtime performance.
96///
97/// # Allocation lifecycle
98///
99/// Intended for long-running system services with user-provided values, `FlyStr`s are removed from
100/// the global cache when the last reference to them is dropped. While this incurs some overhead
101/// it is important to prevent the value cache from becoming a denial-of-service vector.
102#[derive(Clone, Eq, Hash, PartialEq)]
103pub struct FlyStr(RawRepr);
104
105#[cfg(feature = "json_schema")]
106impl schemars::JsonSchema for FlyStr {
107    fn schema_name() -> String {
108        str::schema_name()
109    }
110
111    fn json_schema(generator: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
112        str::json_schema(generator)
113    }
114
115    fn is_referenceable() -> bool {
116        false
117    }
118}
119
120static_assertions::assert_eq_size!(FlyStr, usize);
121
122impl FlyStr {
123    /// Create a `FlyStr`, allocating it in the cache if the value is not already cached.
124    ///
125    /// # Performance
126    ///
127    /// Creating an instance of this type requires accessing the global cache of strings, which
128    /// involves taking a lock. When multiple threads are allocating lots of strings there may be
129    /// contention.
130    ///
131    /// Each string allocated is hashed for lookup in the cache.
132    #[inline]
133    pub fn new(s: impl AsRef<str> + Into<String>) -> Self {
134        Self(RawRepr::new_str(s))
135    }
136
137    /// Returns the underlying string slice.
138    #[inline]
139    pub fn as_str(&self) -> &str {
140        // SAFETY: Every FlyStr is constructed from valid UTF-8 bytes.
141        unsafe { std::str::from_utf8_unchecked(self.0.as_bytes()) }
142    }
143}
144
145impl Default for FlyStr {
146    #[inline]
147    fn default() -> Self {
148        Self::new("")
149    }
150}
151
152impl From<&'_ str> for FlyStr {
153    #[inline]
154    fn from(s: &str) -> Self {
155        Self::new(s)
156    }
157}
158
159impl From<&'_ String> for FlyStr {
160    #[inline]
161    fn from(s: &String) -> Self {
162        Self::new(&**s)
163    }
164}
165
166impl From<String> for FlyStr {
167    #[inline]
168    fn from(s: String) -> Self {
169        Self::new(s)
170    }
171}
172
173impl From<Box<str>> for FlyStr {
174    #[inline]
175    fn from(s: Box<str>) -> Self {
176        Self::new(s)
177    }
178}
179
180impl From<&Box<str>> for FlyStr {
181    #[inline]
182    fn from(s: &Box<str>) -> Self {
183        Self::new(&**s)
184    }
185}
186
187impl TryFrom<FlyByteStr> for FlyStr {
188    type Error = std::str::Utf8Error;
189
190    #[inline]
191    fn try_from(b: FlyByteStr) -> Result<FlyStr, Self::Error> {
192        // The internals of both FlyStr and FlyByteStr are the same, but it's only sound to return
193        // a FlyStr if the RawRepr contains/points to valid UTF-8.
194        std::str::from_utf8(b.as_bytes())?;
195        Ok(FlyStr(b.0))
196    }
197}
198
199impl Into<String> for FlyStr {
200    #[inline]
201    fn into(self) -> String {
202        self.as_str().to_owned()
203    }
204}
205
206impl Into<String> for &'_ FlyStr {
207    #[inline]
208    fn into(self) -> String {
209        self.as_str().to_owned()
210    }
211}
212
213impl Deref for FlyStr {
214    type Target = str;
215
216    #[inline]
217    fn deref(&self) -> &Self::Target {
218        self.as_str()
219    }
220}
221
222impl AsRef<str> for FlyStr {
223    #[inline]
224    fn as_ref(&self) -> &str {
225        self.as_str()
226    }
227}
228
229impl PartialOrd for FlyStr {
230    #[inline]
231    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
232        Some(self.cmp(other))
233    }
234}
235impl Ord for FlyStr {
236    #[inline]
237    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
238        self.as_str().cmp(other.as_str())
239    }
240}
241
242impl PartialEq<str> for FlyStr {
243    #[inline]
244    fn eq(&self, other: &str) -> bool {
245        self.as_str() == other
246    }
247}
248
249impl PartialEq<&'_ str> for FlyStr {
250    #[inline]
251    fn eq(&self, other: &&str) -> bool {
252        self.as_str() == *other
253    }
254}
255
256impl PartialEq<String> for FlyStr {
257    #[inline]
258    fn eq(&self, other: &String) -> bool {
259        self.as_str() == &**other
260    }
261}
262
263impl PartialEq<FlyByteStr> for FlyStr {
264    #[inline]
265    fn eq(&self, other: &FlyByteStr) -> bool {
266        self.0 == other.0
267    }
268}
269
270impl PartialEq<&'_ FlyByteStr> for FlyStr {
271    #[inline]
272    fn eq(&self, other: &&FlyByteStr) -> bool {
273        self.0 == other.0
274    }
275}
276
277impl PartialOrd<str> for FlyStr {
278    #[inline]
279    fn partial_cmp(&self, other: &str) -> Option<std::cmp::Ordering> {
280        self.as_str().partial_cmp(other)
281    }
282}
283
284impl PartialOrd<&str> for FlyStr {
285    #[inline]
286    fn partial_cmp(&self, other: &&str) -> Option<std::cmp::Ordering> {
287        self.as_str().partial_cmp(*other)
288    }
289}
290
291impl PartialOrd<FlyByteStr> for FlyStr {
292    #[inline]
293    fn partial_cmp(&self, other: &FlyByteStr) -> Option<std::cmp::Ordering> {
294        BStr::new(self.as_str()).partial_cmp(other.as_bstr())
295    }
296}
297
298impl PartialOrd<&'_ FlyByteStr> for FlyStr {
299    #[inline]
300    fn partial_cmp(&self, other: &&FlyByteStr) -> Option<std::cmp::Ordering> {
301        BStr::new(self.as_str()).partial_cmp(other.as_bstr())
302    }
303}
304
305impl Debug for FlyStr {
306    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
307        Debug::fmt(self.as_str(), f)
308    }
309}
310
311impl Display for FlyStr {
312    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
313        Display::fmt(self.as_str(), f)
314    }
315}
316
317impl Serialize for FlyStr {
318    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
319        serializer.serialize_str(self.as_str())
320    }
321}
322
323impl<'d> Deserialize<'d> for FlyStr {
324    fn deserialize<D: Deserializer<'d>>(deserializer: D) -> Result<Self, D::Error> {
325        deserializer.deserialize_str(FlyStrVisitor)
326    }
327}
328
329struct FlyStrVisitor;
330
331impl Visitor<'_> for FlyStrVisitor {
332    type Value = FlyStr;
333    fn expecting(&self, formatter: &mut Formatter<'_>) -> FmtResult {
334        formatter.write_str("a string")
335    }
336
337    fn visit_borrowed_str<'de, E>(self, v: &'de str) -> Result<Self::Value, E>
338    where
339        E: serde::de::Error,
340    {
341        Ok(v.into())
342    }
343
344    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
345    where
346        E: serde::de::Error,
347    {
348        Ok(v.into())
349    }
350
351    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
352    where
353        E: serde::de::Error,
354    {
355        Ok(v.into())
356    }
357}
358
359macro_rules! new_raw_repr {
360    ($borrowed_bytes:expr) => {
361        if $borrowed_bytes.len() <= MAX_INLINE_SIZE {
362            RawRepr::new_inline($borrowed_bytes)
363        } else {
364            let mut cache = CACHE.lock().unwrap();
365            if let Some(existing) = cache.get($borrowed_bytes) {
366                RawRepr::from_storage(existing)
367            } else {
368                RawRepr::new_for_storage(&mut cache, $borrowed_bytes)
369            }
370        }
371    };
372}
373
374/// An immutable bytestring type which only stores a single copy of each string allocated.
375/// Internally represented as a shared pointer to the backing allocation. Occupies a single pointer width.
376///
377/// # Small strings
378///
379/// Very short strings are stored inline in the pointer with bit-tagging, so no allocations are
380/// performed.
381///
382/// # Performance
383///
384/// It's slower to construct than a regular `BString` but trades that for reduced standing memory
385/// usage by deduplicating strings. `PartialEq` and `Hash` are implemented on the underlying pointer
386/// value rather than the pointed-to data for faster equality comparisons and indexing, which is
387/// sound by virtue of the type guaranteeing that only one `FlyByteStr` pointer value will exist at
388/// any time for a given string's contents.
389///
390/// As with any performance optimization, you should only use this type if you can measure the
391/// benefit it provides to your program. Pay careful attention to creating `FlyByteStr`s in hot
392/// paths as it may regress runtime performance.
393///
394/// # Allocation lifecycle
395///
396/// Intended for long-running system services with user-provided values, `FlyByteStr`s are removed
397/// from the global cache when the last reference to them is dropped. While this incurs some
398/// overhead it is important to prevent the value cache from becoming a denial-of-service vector.
399#[derive(Clone, Eq, Hash, PartialEq)]
400pub struct FlyByteStr(RawRepr);
401
402static_assertions::assert_eq_size!(FlyByteStr, usize);
403
404impl FlyByteStr {
405    /// Create a `FlyByteStr`, allocating it in the cache if the value is not already cached.
406    ///
407    /// # Performance
408    ///
409    /// Creating an instance of this type requires accessing the global cache of strings, which
410    /// involves taking a lock. When multiple threads are allocating lots of strings there may be
411    /// contention.
412    ///
413    /// Each string allocated is hashed for lookup in the cache.
414    pub fn new(s: impl AsRef<[u8]> + Into<Vec<u8>>) -> Self {
415        Self(RawRepr::new(s))
416    }
417
418    /// Returns the underlying bytestring slice.
419    #[inline]
420    pub fn as_bstr(&self) -> &BStr {
421        BStr::new(self.0.as_bytes())
422    }
423
424    /// Returns the underlying byte slice.
425    #[inline]
426    pub fn as_bytes(&self) -> &[u8] {
427        self.0.as_bytes()
428    }
429}
430
431impl Default for FlyByteStr {
432    #[inline]
433    fn default() -> Self {
434        Self::new(b"")
435    }
436}
437
438impl From<&'_ [u8]> for FlyByteStr {
439    #[inline]
440    fn from(s: &[u8]) -> Self {
441        Self::new(s)
442    }
443}
444
445impl<const N: usize> From<[u8; N]> for FlyByteStr {
446    #[inline]
447    fn from(s: [u8; N]) -> Self {
448        Self::new(&s)
449    }
450}
451
452impl From<&'_ BStr> for FlyByteStr {
453    #[inline]
454    fn from(s: &BStr) -> Self {
455        let bytes: &[u8] = s.as_ref();
456        Self(new_raw_repr!(bytes))
457    }
458}
459
460impl From<&'_ str> for FlyByteStr {
461    #[inline]
462    fn from(s: &str) -> Self {
463        Self::new(s)
464    }
465}
466
467impl From<&'_ Vec<u8>> for FlyByteStr {
468    #[inline]
469    fn from(s: &Vec<u8>) -> Self {
470        Self(new_raw_repr!(&s[..]))
471    }
472}
473
474impl From<&'_ String> for FlyByteStr {
475    #[inline]
476    fn from(s: &String) -> Self {
477        Self::new(&**s)
478    }
479}
480
481impl From<Vec<u8>> for FlyByteStr {
482    #[inline]
483    fn from(s: Vec<u8>) -> Self {
484        Self::new(s)
485    }
486}
487
488impl From<String> for FlyByteStr {
489    #[inline]
490    fn from(s: String) -> Self {
491        Self::new(s)
492    }
493}
494
495impl From<BString> for FlyByteStr {
496    #[inline]
497    fn from(s: BString) -> Self {
498        Self::new(s)
499    }
500}
501
502impl From<Box<[u8]>> for FlyByteStr {
503    #[inline]
504    fn from(s: Box<[u8]>) -> Self {
505        Self::new(s)
506    }
507}
508
509impl From<Box<str>> for FlyByteStr {
510    #[inline]
511    fn from(s: Box<str>) -> Self {
512        Self(new_raw_repr!(s.as_bytes()))
513    }
514}
515
516impl From<&'_ Box<[u8]>> for FlyByteStr {
517    #[inline]
518    fn from(s: &'_ Box<[u8]>) -> Self {
519        Self(new_raw_repr!(&**s))
520    }
521}
522
523impl From<&Box<str>> for FlyByteStr {
524    #[inline]
525    fn from(s: &Box<str>) -> Self {
526        Self::new(&**s)
527    }
528}
529
530impl Into<BString> for FlyByteStr {
531    #[inline]
532    fn into(self) -> BString {
533        self.as_bstr().to_owned()
534    }
535}
536
537impl Into<Vec<u8>> for FlyByteStr {
538    #[inline]
539    fn into(self) -> Vec<u8> {
540        self.as_bytes().to_owned()
541    }
542}
543
544impl From<FlyStr> for FlyByteStr {
545    #[inline]
546    fn from(s: FlyStr) -> FlyByteStr {
547        Self(s.0)
548    }
549}
550
551impl TryInto<String> for FlyByteStr {
552    type Error = std::string::FromUtf8Error;
553
554    #[inline]
555    fn try_into(self) -> Result<String, Self::Error> {
556        String::from_utf8(self.into())
557    }
558}
559
560impl Deref for FlyByteStr {
561    type Target = BStr;
562
563    #[inline]
564    fn deref(&self) -> &Self::Target {
565        self.as_bstr()
566    }
567}
568
569impl AsRef<BStr> for FlyByteStr {
570    #[inline]
571    fn as_ref(&self) -> &BStr {
572        self.as_bstr()
573    }
574}
575
576impl PartialOrd for FlyByteStr {
577    #[inline]
578    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
579        Some(self.cmp(other))
580    }
581}
582impl Ord for FlyByteStr {
583    #[inline]
584    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
585        self.as_bstr().cmp(other.as_bstr())
586    }
587}
588
589impl PartialEq<[u8]> for FlyByteStr {
590    #[inline]
591    fn eq(&self, other: &[u8]) -> bool {
592        self.as_bytes() == other
593    }
594}
595
596impl PartialEq<BStr> for FlyByteStr {
597    #[inline]
598    fn eq(&self, other: &BStr) -> bool {
599        self.as_bytes() == other
600    }
601}
602
603impl PartialEq<str> for FlyByteStr {
604    #[inline]
605    fn eq(&self, other: &str) -> bool {
606        self.as_bytes() == other.as_bytes()
607    }
608}
609
610impl PartialEq<&'_ [u8]> for FlyByteStr {
611    #[inline]
612    fn eq(&self, other: &&[u8]) -> bool {
613        self.as_bytes() == *other
614    }
615}
616
617impl PartialEq<&'_ BStr> for FlyByteStr {
618    #[inline]
619    fn eq(&self, other: &&BStr) -> bool {
620        self.as_bstr() == *other
621    }
622}
623
624impl PartialEq<&'_ str> for FlyByteStr {
625    #[inline]
626    fn eq(&self, other: &&str) -> bool {
627        self.as_bytes() == other.as_bytes()
628    }
629}
630
631impl PartialEq<String> for FlyByteStr {
632    #[inline]
633    fn eq(&self, other: &String) -> bool {
634        self.as_bytes() == other.as_bytes()
635    }
636}
637
638impl PartialEq<FlyStr> for FlyByteStr {
639    #[inline]
640    fn eq(&self, other: &FlyStr) -> bool {
641        self.0 == other.0
642    }
643}
644
645impl PartialEq<&'_ FlyStr> for FlyByteStr {
646    #[inline]
647    fn eq(&self, other: &&FlyStr) -> bool {
648        self.0 == other.0
649    }
650}
651
652impl PartialOrd<str> for FlyByteStr {
653    #[inline]
654    fn partial_cmp(&self, other: &str) -> Option<std::cmp::Ordering> {
655        self.as_bstr().partial_cmp(other)
656    }
657}
658
659impl PartialOrd<&str> for FlyByteStr {
660    #[inline]
661    fn partial_cmp(&self, other: &&str) -> Option<std::cmp::Ordering> {
662        self.as_bstr().partial_cmp(other)
663    }
664}
665
666impl PartialOrd<FlyStr> for FlyByteStr {
667    #[inline]
668    fn partial_cmp(&self, other: &FlyStr) -> Option<std::cmp::Ordering> {
669        self.as_bstr().partial_cmp(other.as_str())
670    }
671}
672
673impl Debug for FlyByteStr {
674    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
675        Debug::fmt(self.as_bstr(), f)
676    }
677}
678
679impl Display for FlyByteStr {
680    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
681        Display::fmt(self.as_bstr(), f)
682    }
683}
684
685impl Serialize for FlyByteStr {
686    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
687        serializer.serialize_bytes(self.as_bytes())
688    }
689}
690
691impl<'d> Deserialize<'d> for FlyByteStr {
692    fn deserialize<D: Deserializer<'d>>(deserializer: D) -> Result<Self, D::Error> {
693        deserializer.deserialize_bytes(FlyByteStrVisitor)
694    }
695}
696
697struct FlyByteStrVisitor;
698
699impl<'de> Visitor<'de> for FlyByteStrVisitor {
700    type Value = FlyByteStr;
701    fn expecting(&self, formatter: &mut Formatter<'_>) -> FmtResult {
702        formatter.write_str("a string, a bytestring, or a sequence of bytes")
703    }
704
705    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
706    where
707        E: serde::de::Error,
708    {
709        Ok(FlyByteStr::from(v))
710    }
711
712    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
713    where
714        E: serde::de::Error,
715    {
716        Ok(FlyByteStr::from(v))
717    }
718
719    fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
720    where
721        E: serde::de::Error,
722    {
723        Ok(FlyByteStr::from(v))
724    }
725
726    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
727    where
728        A: serde::de::SeqAccess<'de>,
729    {
730        let mut bytes = vec![];
731        while let Some(b) = seq.next_element::<u8>()? {
732            bytes.push(b);
733        }
734        Ok(FlyByteStr::from(bytes))
735    }
736}
737
738#[repr(C)] // Guarantee predictable field ordering.
739union RawRepr {
740    /// Strings longer than MAX_INLINE_SIZE are allocated using `raw::Payload`, which have a thin
741    /// pointer representation. This means that `heap` variants of `Storage` will always have the
742    /// pointer contents aligned and this variant will never have its least significant bit set.
743    ///
744    /// We store a `NonNull` so we can have guaranteed pointer layout.
745    heap: NonNull<raw::Payload>,
746
747    /// Strings shorter than or equal in length to MAX_INLINE_SIZE are stored in this union variant.
748    /// The first byte is reserved for the size of the inline string, and the remaining bytes are
749    /// used for the string itself. The first byte has its least significant bit set to 1 to
750    /// distinguish inline strings from heap-allocated ones, and the size is stored in the remaining
751    /// 7 bits.
752    inline: InlineRepr,
753}
754
755// The inline variant should not cause us to occupy more space than the heap variant alone.
756static_assertions::assert_eq_size!(NonNull<raw::Payload>, RawRepr);
757
758// Alignment of the payload pointers must be >1 in order to have space for the mask bit at the
759// bottom.
760static_assertions::const_assert!(std::mem::align_of::<raw::Payload>() > 1);
761
762// The short string optimization makes little-endian layout assumptions with the first byte being
763// the least significant.
764static_assertions::assert_type_eq_all!(byteorder::NativeEndian, byteorder::LittleEndian);
765
766/// An enum with an actual discriminant that allows us to limit the reach of unsafe code in the
767/// implementation without affecting the stored size of `RawRepr`.
768enum SafeRepr<'a> {
769    Heap(NonNull<raw::Payload>),
770    Inline(&'a InlineRepr),
771}
772
773// SAFETY: FlyStr can be dropped from any thread.
774unsafe impl Send for RawRepr {}
775// SAFETY: FlyStr has an immutable public API.
776unsafe impl Sync for RawRepr {}
777
778impl RawRepr {
779    fn new_str(s: impl AsRef<str>) -> Self {
780        let borrowed = s.as_ref();
781        new_raw_repr!(borrowed.as_bytes())
782    }
783
784    fn new(s: impl AsRef<[u8]>) -> Self {
785        let borrowed = s.as_ref();
786        new_raw_repr!(borrowed)
787    }
788
789    #[inline]
790    fn new_inline(s: &[u8]) -> Self {
791        assert!(s.len() <= MAX_INLINE_SIZE);
792        let new = Self { inline: InlineRepr::new(s) };
793        assert!(new.is_inline(), "least significant bit must be 1 for inline strings");
794        new
795    }
796
797    #[inline]
798    fn from_storage(storage: &Storage) -> Self {
799        if storage.inc_ref() == 0 {
800            // Another thread is trying to lock the cache and free this string. They already
801            // released their refcount, so give it back to them. This will prevent this thread
802            // and other threads from attempting to free the string if they drop the refcount back
803            // down.
804            storage.inc_ref();
805        }
806        Self { heap: storage.0 }
807    }
808
809    #[inline]
810    fn new_for_storage(cache: &mut AHashSet<Storage>, bytes: &[u8]) -> Self {
811        assert!(bytes.len() > MAX_INLINE_SIZE);
812        // `Payload::alloc` starts the refcount at 1.
813        let new_storage = raw::Payload::alloc(bytes);
814
815        let for_cache = Storage(new_storage);
816        let new = Self { heap: new_storage };
817        assert!(!new.is_inline(), "least significant bit must be 0 for heap strings");
818        cache.insert(for_cache);
819        new
820    }
821
822    #[inline]
823    fn is_inline(&self) -> bool {
824        // SAFETY: it is always OK to interpret a pointer as byte array as long as we don't expect
825        // to retain provenance.
826        (unsafe { self.inline.masked_len } & 1) == 1
827    }
828
829    #[inline]
830    fn project(&self) -> SafeRepr<'_> {
831        if self.is_inline() {
832            // SAFETY: Just checked that this is the inline variant.
833            SafeRepr::Inline(unsafe { &self.inline })
834        } else {
835            // SAFETY: Just checked that this is the heap variant.
836            SafeRepr::Heap(unsafe { self.heap })
837        }
838    }
839
840    #[inline]
841    fn as_bytes(&self) -> &[u8] {
842        match self.project() {
843            // SAFETY: FlyStr owns the payload stored as a NonNull, it is live as long as `FlyStr`.
844            SafeRepr::Heap(ptr) => unsafe { &*raw::Payload::bytes(ptr.as_ptr()) },
845            SafeRepr::Inline(i) => i.as_bytes(),
846        }
847    }
848}
849
850impl PartialEq for RawRepr {
851    #[inline]
852    fn eq(&self, other: &Self) -> bool {
853        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
854        // to retain provenance.
855        let lhs = unsafe { &self.inline };
856        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
857        // to retain provenance.
858        let rhs = unsafe { &other.inline };
859        lhs.eq(rhs)
860    }
861}
862impl Eq for RawRepr {}
863
864impl Hash for RawRepr {
865    fn hash<H: Hasher>(&self, h: &mut H) {
866        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
867        // to retain provenance.
868        let this = unsafe { &self.inline };
869        this.hash(h);
870    }
871}
872
873impl Clone for RawRepr {
874    fn clone(&self) -> Self {
875        match self.project() {
876            SafeRepr::Heap(ptr) => {
877                // SAFETY: We own this payload, we know it's live because we are.
878                unsafe {
879                    raw::Payload::inc_ref(ptr.as_ptr());
880                }
881                Self { heap: ptr }
882            }
883            SafeRepr::Inline(&inline) => Self { inline },
884        }
885    }
886}
887
888impl Drop for RawRepr {
889    fn drop(&mut self) {
890        if !self.is_inline() {
891            // SAFETY: We checked above that this is the heap repr.
892            let heap = unsafe { self.heap };
893
894            // Decrementing the refcount before locking the cache causes the following failure mode:
895            //
896            // 1. We drop the refcount to 0.
897            // 2. Another thread finds the string we're about to drop and increments the refcount
898            //    back up to 1.
899            // 3. That thread drops its refcount, and also sees the refcount drop to 0.
900            // 4. That thread locks the cache, removes the value, and drops the payload. This leaves
901            //    us with a dangling pointer to the dropped payload.
902            // 5. We lock the cache and go to look up our value in the cache. Our payload pointer is
903            //    dangling now, but we don't know that. If we try to read through our dangling
904            //    pointer, we cause UB.
905            //
906            // To account for this failure mode and still optimistically drop our refcount, we
907            // modify the procedure slightly:
908            //
909            // 1. We drop the refcount to 0.
910            // 2. Another thread finds the string we're about to drop and increments the refcount
911            //    back up to 1. It notices that the refcount incremented from 0 to 1, and so knows
912            //    that our thread will try to drop it. While still holding the cache lock, that
913            //    thread increments the refcount again from 1 to 2. This "gives back" the refcount
914            //    to our thread.
915            // 3. That thread drops its refcount, and sees the refcount drop to 1. It won't try to
916            //    drop the payload this time.
917            // 4. We lock the cache, and decrement the refcount a second time. If it decremented
918            //    from 0 or 1, then we know that no other threads are currently holding references
919            //    to it and we can safely drop it ourselves.
920
921            // SAFETY: The payload is live.
922            let prev_refcount = unsafe { raw::Payload::dec_ref(heap.as_ptr()) };
923
924            // If we held the final refcount outside of the cache, try to remove the string.
925            if prev_refcount == 1 {
926                let mut cache = CACHE.lock().unwrap();
927
928                let current_refcount = unsafe { raw::Payload::dec_ref(heap.as_ptr()) };
929                if current_refcount <= 1 {
930                    // If the refcount was still 0 after acquiring the cache lock, no other thread
931                    // looked up this payload between optimistically decrementing the refcount and
932                    // now. If the refcount was 1, then another thread did, but dropped its refcount
933                    // before we got the cache lock. Either way, we can safely remove the string
934                    // from the cache and free it.
935
936                    let bytes = unsafe { &*raw::Payload::bytes(heap.as_ptr()) };
937                    assert!(
938                        cache.remove(bytes),
939                        "cache did not contain bytes, but this thread didn't remove them yet",
940                    );
941
942                    // Get out of the critical section as soon as possible
943                    drop(cache);
944
945                    // SAFETY: The payload is live.
946                    unsafe { raw::Payload::dealloc(heap.as_ptr()) };
947                } else {
948                    // Another thread looked up this payload, made a reference to it, and gave our
949                    // refcount back to us for a minmium refcount of 2. We re-removed our refcount,
950                    // giving a minimum of one. This means it's no longer our responsibility to
951                    // deallocate the string, so we ended up needlessly locking the cache.
952                }
953            }
954        }
955    }
956}
957
958#[derive(Clone, Copy, Hash, PartialEq)]
959#[repr(C)] // Preserve field ordering.
960struct InlineRepr {
961    /// The first byte, which corresponds to the LSB of a pointer in the other variant.
962    ///
963    /// When the first bit is `1` the rest of this byte stores the length of the inline string.
964    masked_len: u8,
965    /// Inline string contents.
966    contents: [u8; MAX_INLINE_SIZE],
967}
968
969/// We can store small strings up to 1 byte less than the size of the pointer to the heap-allocated
970/// string.
971const MAX_INLINE_SIZE: usize = std::mem::size_of::<NonNull<raw::Payload>>() - 1;
972
973// Guard rail to make sure we never end up with an incorrect inline size encoding. Ensure that
974// MAX_INLINE_SIZE is always smaller than the maximum size we can represent in a byte with the LSB
975// reserved.
976static_assertions::const_assert!((std::u8::MAX >> 1) as usize >= MAX_INLINE_SIZE);
977
978impl InlineRepr {
979    #[inline]
980    fn new(s: &[u8]) -> Self {
981        assert!(s.len() <= MAX_INLINE_SIZE);
982
983        // Set the first byte to the length of the inline string with LSB masked to 1.
984        let masked_len = ((s.len() as u8) << 1) | 1;
985
986        let mut contents = [0u8; MAX_INLINE_SIZE];
987        contents[..s.len()].copy_from_slice(s);
988
989        Self { masked_len, contents }
990    }
991
992    #[inline]
993    fn as_bytes(&self) -> &[u8] {
994        let len = self.masked_len >> 1;
995        &self.contents[..len as usize]
996    }
997}
998
999#[cfg(test)]
1000mod tests {
1001    use super::*;
1002    use static_assertions::{const_assert, const_assert_eq};
1003    use std::collections::BTreeSet;
1004    use test_case::test_case;
1005
1006    // These tests all manipulate the process-global cache in the parent module. On target devices
1007    // we run each test case in its own process, so the test cases can't pollute each other. On
1008    // host, we run tests with a process for each suite (which is the Rust upstream default), and
1009    // we need to manually isolate the tests from each other.
1010    #[cfg(not(target_os = "fuchsia"))]
1011    use serial_test::serial;
1012
1013    fn reset_global_cache() {
1014        // We still want subsequent tests to be able to run if one in the same process panics.
1015        match CACHE.lock() {
1016            Ok(mut c) => *c = AHashSet::new(),
1017            Err(e) => *e.into_inner() = AHashSet::new(),
1018        }
1019    }
1020    fn num_strings_in_global_cache() -> usize {
1021        CACHE.lock().unwrap().len()
1022    }
1023
1024    impl RawRepr {
1025        fn refcount(&self) -> Option<usize> {
1026            match self.project() {
1027                SafeRepr::Heap(ptr) => {
1028                    // SAFETY: The payload is live as long as the repr is live.
1029                    let count = unsafe { raw::Payload::refcount(ptr.as_ptr()) };
1030                    Some(count)
1031                }
1032                SafeRepr::Inline(_) => None,
1033            }
1034        }
1035    }
1036
1037    const SHORT_STRING: &str = "hello";
1038    const_assert!(SHORT_STRING.len() < MAX_INLINE_SIZE);
1039
1040    const MAX_LEN_SHORT_STRING: &str = "hello!!";
1041    const_assert_eq!(MAX_LEN_SHORT_STRING.len(), MAX_INLINE_SIZE);
1042
1043    const MIN_LEN_LONG_STRING: &str = "hello!!!";
1044    const_assert_eq!(MIN_LEN_LONG_STRING.len(), MAX_INLINE_SIZE + 1);
1045
1046    const LONG_STRING: &str = "hello, world!!!!!!!!!!!!!!!!!!!!";
1047    const_assert!(LONG_STRING.len() > MAX_INLINE_SIZE);
1048
1049    const SHORT_NON_UTF8: &[u8] = b"\xF0\x28\x8C\x28";
1050    const_assert!(SHORT_NON_UTF8.len() < MAX_INLINE_SIZE);
1051
1052    const LONG_NON_UTF8: &[u8] = b"\xF0\x28\x8C\x28\xF0\x28\x8C\x28";
1053    const_assert!(LONG_NON_UTF8.len() > MAX_INLINE_SIZE);
1054
1055    #[test_case("" ; "empty string")]
1056    #[test_case(SHORT_STRING ; "short strings")]
1057    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1058    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1059    #[test_case(LONG_STRING ; "long strings")]
1060    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1061    fn string_formatting_is_equivalent_to_str(original: &str) {
1062        reset_global_cache();
1063
1064        let cached = FlyStr::new(original);
1065        assert_eq!(format!("{original}"), format!("{cached}"));
1066        assert_eq!(format!("{original:?}"), format!("{cached:?}"));
1067
1068        let cached = FlyByteStr::new(original);
1069        assert_eq!(format!("{original}"), format!("{cached}"));
1070        assert_eq!(format!("{original:?}"), format!("{cached:?}"));
1071    }
1072
1073    #[test_case("" ; "empty string")]
1074    #[test_case(SHORT_STRING ; "short strings")]
1075    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1076    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1077    #[test_case(LONG_STRING ; "long strings")]
1078    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1079    fn string_equality_works(contents: &str) {
1080        reset_global_cache();
1081
1082        let cached = FlyStr::new(contents);
1083        let bytes_cached = FlyByteStr::new(contents);
1084        assert_eq!(cached, cached.clone(), "must be equal to itself");
1085        assert_eq!(cached, contents, "must be equal to the original");
1086        assert_eq!(cached, contents.to_owned(), "must be equal to an owned copy of the original");
1087        assert_eq!(cached, bytes_cached);
1088
1089        // test inequality too
1090        assert_ne!(cached, "goodbye");
1091        assert_ne!(bytes_cached, "goodbye");
1092    }
1093
1094    #[test_case("", SHORT_STRING ; "empty and short string")]
1095    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1096    #[test_case(MAX_LEN_SHORT_STRING, MIN_LEN_LONG_STRING ; "short and long strings")]
1097    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1098    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1099    fn string_comparison_works(lesser_contents: &str, greater_contents: &str) {
1100        reset_global_cache();
1101
1102        let lesser = FlyStr::new(lesser_contents);
1103        let lesser_bytes = FlyByteStr::from(lesser_contents);
1104        let greater = FlyStr::new(greater_contents);
1105        let greater_bytes = FlyByteStr::from(greater_contents);
1106
1107        // lesser as method receiver
1108        assert!(lesser < greater);
1109        assert!(lesser < greater_bytes);
1110        assert!(lesser_bytes < greater);
1111        assert!(lesser_bytes < greater_bytes);
1112        assert!(lesser <= greater);
1113        assert!(lesser <= greater_bytes);
1114        assert!(lesser_bytes <= greater);
1115        assert!(lesser_bytes <= greater_bytes);
1116
1117        // greater as method receiver
1118        assert!(greater > lesser);
1119        assert!(greater > lesser_bytes);
1120        assert!(greater_bytes > lesser);
1121        assert!(greater >= lesser);
1122        assert!(greater >= lesser_bytes);
1123        assert!(greater_bytes >= lesser);
1124        assert!(greater_bytes >= lesser_bytes);
1125    }
1126
1127    #[test_case("" ; "empty string")]
1128    #[test_case(SHORT_STRING ; "short strings")]
1129    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1130    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1131    fn no_allocations_for_short_strings(contents: &str) {
1132        reset_global_cache();
1133        assert_eq!(num_strings_in_global_cache(), 0);
1134
1135        let original = FlyStr::new(contents);
1136        assert_eq!(num_strings_in_global_cache(), 0);
1137        assert_eq!(original.0.refcount(), None);
1138
1139        let cloned = original.clone();
1140        assert_eq!(num_strings_in_global_cache(), 0);
1141        assert_eq!(cloned.0.refcount(), None);
1142
1143        let deduped = FlyStr::new(contents);
1144        assert_eq!(num_strings_in_global_cache(), 0);
1145        assert_eq!(deduped.0.refcount(), None);
1146    }
1147
1148    #[test_case("" ; "empty string")]
1149    #[test_case(SHORT_STRING ; "short strings")]
1150    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1151    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1152    fn no_allocations_for_short_bytestrings(contents: &str) {
1153        reset_global_cache();
1154        assert_eq!(num_strings_in_global_cache(), 0);
1155
1156        let original = FlyByteStr::new(contents);
1157        assert_eq!(num_strings_in_global_cache(), 0);
1158        assert_eq!(original.0.refcount(), None);
1159
1160        let cloned = original.clone();
1161        assert_eq!(num_strings_in_global_cache(), 0);
1162        assert_eq!(cloned.0.refcount(), None);
1163
1164        let deduped = FlyByteStr::new(contents);
1165        assert_eq!(num_strings_in_global_cache(), 0);
1166        assert_eq!(deduped.0.refcount(), None);
1167    }
1168
1169    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1170    #[test_case(LONG_STRING ; "long strings")]
1171    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1172    fn only_one_copy_allocated_for_long_strings(contents: &str) {
1173        reset_global_cache();
1174
1175        assert_eq!(num_strings_in_global_cache(), 0);
1176
1177        let original = FlyStr::new(contents);
1178        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1179        assert_eq!(original.0.refcount(), Some(1), "one copy on stack");
1180
1181        let cloned = original.clone();
1182        assert_eq!(num_strings_in_global_cache(), 1, "cloning just incremented refcount");
1183        assert_eq!(cloned.0.refcount(), Some(2), "two copies on stack");
1184
1185        let deduped = FlyStr::new(contents);
1186        assert_eq!(num_strings_in_global_cache(), 1, "new string was deduped");
1187        assert_eq!(deduped.0.refcount(), Some(3), "three copies on stack");
1188    }
1189
1190    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1191    #[test_case(LONG_STRING ; "long strings")]
1192    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1193    fn only_one_copy_allocated_for_long_bytestrings(contents: &str) {
1194        reset_global_cache();
1195
1196        assert_eq!(num_strings_in_global_cache(), 0);
1197
1198        let original = FlyByteStr::new(contents);
1199        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1200        assert_eq!(original.0.refcount(), Some(1), "one copy on stack");
1201
1202        let cloned = original.clone();
1203        assert_eq!(num_strings_in_global_cache(), 1, "cloning just incremented refcount");
1204        assert_eq!(cloned.0.refcount(), Some(2), "two copies on stack");
1205
1206        let deduped = FlyByteStr::new(contents);
1207        assert_eq!(num_strings_in_global_cache(), 1, "new string was deduped");
1208        assert_eq!(deduped.0.refcount(), Some(3), "three copies on stack");
1209    }
1210
1211    #[test]
1212    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1213    fn utf8_and_bytestrings_share_the_cache() {
1214        reset_global_cache();
1215
1216        assert_eq!(num_strings_in_global_cache(), 0, "cache is empty");
1217
1218        let _utf8 = FlyStr::from(MIN_LEN_LONG_STRING);
1219        assert_eq!(num_strings_in_global_cache(), 1, "string was allocated");
1220
1221        let _bytes = FlyByteStr::from(MIN_LEN_LONG_STRING);
1222        assert_eq!(num_strings_in_global_cache(), 1, "bytestring was pulled from cache");
1223    }
1224
1225    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1226    #[test_case(LONG_STRING ; "long strings")]
1227    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1228    fn cached_strings_dropped_when_refs_dropped(contents: &str) {
1229        reset_global_cache();
1230
1231        let alloced = FlyStr::new(contents);
1232        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1233        drop(alloced);
1234        assert_eq!(num_strings_in_global_cache(), 0, "last reference dropped");
1235    }
1236
1237    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1238    #[test_case(LONG_STRING ; "long strings")]
1239    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1240    fn cached_bytestrings_dropped_when_refs_dropped(contents: &str) {
1241        reset_global_cache();
1242
1243        let alloced = FlyByteStr::new(contents);
1244        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1245        drop(alloced);
1246        assert_eq!(num_strings_in_global_cache(), 0, "last reference dropped");
1247    }
1248
1249    #[test_case("", SHORT_STRING ; "empty and short string")]
1250    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1251    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1252    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1253    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1254    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1255    fn equality_and_hashing_with_pointer_value_works_correctly(first: &str, second: &str) {
1256        reset_global_cache();
1257
1258        let first = FlyStr::new(first);
1259        let second = FlyStr::new(second);
1260
1261        let mut set = AHashSet::new();
1262        set.insert(first.clone());
1263        assert!(set.contains(&first));
1264        assert!(!set.contains(&second));
1265
1266        // re-insert the same cmstring
1267        set.insert(first);
1268        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1269
1270        set.insert(second.clone());
1271        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1272        assert!(set.contains(&second));
1273
1274        // re-insert the second cmstring
1275        set.insert(second);
1276        assert_eq!(set.len(), 2);
1277    }
1278
1279    #[test_case("", SHORT_STRING ; "empty and short string")]
1280    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1281    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1282    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1283    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1284    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1285    fn byte_equality_and_hashing_with_pointer_value_works_correctly(first: &str, second: &str) {
1286        reset_global_cache();
1287
1288        let first = FlyByteStr::new(first);
1289        let second = FlyByteStr::new(second);
1290
1291        let mut set = AHashSet::new();
1292        set.insert(first.clone());
1293        assert!(set.contains(&first));
1294        assert!(!set.contains(&second));
1295
1296        // re-insert the same string
1297        set.insert(first);
1298        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1299
1300        set.insert(second.clone());
1301        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1302        assert!(set.contains(&second));
1303
1304        // re-insert the second string
1305        set.insert(second);
1306        assert_eq!(set.len(), 2);
1307    }
1308
1309    #[test_case("", SHORT_STRING ; "empty and short string")]
1310    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1311    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1312    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1313    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1314    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1315    fn comparison_for_btree_storage_works(first: &str, second: &str) {
1316        reset_global_cache();
1317
1318        let first = FlyStr::new(first);
1319        let second = FlyStr::new(second);
1320
1321        let mut set = BTreeSet::new();
1322        set.insert(first.clone());
1323        assert!(set.contains(&first));
1324        assert!(!set.contains(&second));
1325
1326        // re-insert the same cmstring
1327        set.insert(first);
1328        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1329
1330        set.insert(second.clone());
1331        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1332        assert!(set.contains(&second));
1333
1334        // re-insert the second cmstring
1335        set.insert(second);
1336        assert_eq!(set.len(), 2);
1337    }
1338
1339    #[test_case("", SHORT_STRING ; "empty and short string")]
1340    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1341    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1342    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1343    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1344    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1345    fn byte_comparison_for_btree_storage_works(first: &str, second: &str) {
1346        reset_global_cache();
1347
1348        let first = FlyByteStr::new(first);
1349        let second = FlyByteStr::new(second);
1350
1351        let mut set = BTreeSet::new();
1352        set.insert(first.clone());
1353        assert!(set.contains(&first));
1354        assert!(!set.contains(&second));
1355
1356        // re-insert the same string
1357        set.insert(first);
1358        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1359
1360        set.insert(second.clone());
1361        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1362        assert!(set.contains(&second));
1363
1364        // re-insert the second string
1365        set.insert(second);
1366        assert_eq!(set.len(), 2);
1367    }
1368
1369    #[test_case("" ; "empty string")]
1370    #[test_case(SHORT_STRING ; "short strings")]
1371    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1372    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1373    #[test_case(LONG_STRING ; "long strings")]
1374    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1375    fn serde_works(contents: &str) {
1376        reset_global_cache();
1377
1378        let s = FlyStr::new(contents);
1379        let as_json = serde_json::to_string(&s).unwrap();
1380        assert_eq!(as_json, format!("\"{contents}\""));
1381        assert_eq!(s, serde_json::from_str::<FlyStr>(&as_json).unwrap());
1382    }
1383
1384    #[test_case("" ; "empty string")]
1385    #[test_case(SHORT_STRING ; "short strings")]
1386    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1387    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1388    #[test_case(LONG_STRING ; "long strings")]
1389    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1390    fn serde_works_bytestring(contents: &str) {
1391        reset_global_cache();
1392
1393        let s = FlyByteStr::new(contents);
1394        let as_json = serde_json::to_string(&s).unwrap();
1395        assert_eq!(s, serde_json::from_str::<FlyByteStr>(&as_json).unwrap());
1396    }
1397
1398    #[test_case(SHORT_NON_UTF8 ; "short non-utf8 bytestring")]
1399    #[test_case(LONG_NON_UTF8 ; "long non-utf8 bytestring")]
1400    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1401    fn non_utf8_works(contents: &[u8]) {
1402        reset_global_cache();
1403
1404        let res: Result<FlyStr, _> = FlyByteStr::from(contents).try_into();
1405        res.unwrap_err();
1406    }
1407
1408    #[test_case("" ; "empty string")]
1409    #[test_case(SHORT_STRING ; "short strings")]
1410    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1411    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1412    #[test_case(LONG_STRING ; "long strings")]
1413    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1414    fn flystr_to_flybytestr_and_back(contents: &str) {
1415        reset_global_cache();
1416
1417        let bytestr = FlyByteStr::from(contents);
1418        let flystr = FlyStr::try_from(bytestr.clone()).unwrap();
1419        assert_eq!(bytestr, flystr);
1420        let bytestr2 = FlyByteStr::from(flystr.clone());
1421        assert_eq!(bytestr, bytestr2);
1422    }
1423}