@@ -6,6 +6,7 @@ extern crate alloc;
66
77use alloc:: {
88 alloc:: { alloc, dealloc, Layout } ,
9+ str:: Utf8Error ,
910 string:: String ,
1011} ;
1112use core:: {
@@ -23,11 +24,82 @@ use crate::vint::VarInt;
2324const HEAP_ALIGN : usize = 2 ;
2425const WIDTH : usize = mem:: size_of :: < usize > ( ) ;
2526
27+ /// Compact representation of immutable UTF-8 strings. Optimized for memory usage and struct packing.
28+ ///
29+ /// # Example
30+ /// ```
31+ /// let s = cold_string::ColdString::new("qwerty");
32+ /// assert_eq!(s.as_str(), "qwerty");
33+ /// ```
34+ /// ```
35+ /// use std::mem;
36+ /// use cold_string::ColdString;
37+ ///
38+ /// assert_eq!(mem::size_of::<ColdString>(), 8);
39+ /// assert_eq!(mem::align_of::<ColdString>(), 1);
40+ /// assert_eq!(mem::size_of::<(ColdString, u8)>(), 9);
41+ /// assert_eq!(mem::align_of::<(ColdString, u8)>(), 1);
42+ /// ```
2643#[ repr( transparent) ]
2744pub struct ColdString ( [ u8 ; WIDTH ] ) ;
2845
2946impl ColdString {
30- pub fn new ( s : & str ) -> Self {
47+ /// Convert a slice of bytes into a [`ColdString`].
48+ ///
49+ /// A [`ColdString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8).
50+ /// This method converts from an arbitrary contiguous collection of bytes into a
51+ /// [`ColdString`], failing if the provided bytes are not `UTF-8`.
52+ ///
53+ /// # Examples
54+ /// ### Valid UTF-8
55+ /// ```
56+ /// # use cold_string::ColdString;
57+ /// let bytes = [240, 159, 166, 128, 240, 159, 146, 175];
58+ /// let compact = ColdString::from_utf8(&bytes).expect("valid UTF-8");
59+ ///
60+ /// assert_eq!(compact, "🦀💯");
61+ /// ```
62+ ///
63+ /// ### Invalid UTF-8
64+ /// ```
65+ /// # use cold_string::ColdString;
66+ /// let bytes = [255, 255, 255];
67+ /// let result = ColdString::from_utf8(&bytes);
68+ ///
69+ /// assert!(result.is_err());
70+ /// ```
71+ pub fn from_utf8 ( v : & [ u8 ] ) -> Result < Self , Utf8Error > {
72+ Ok ( Self :: new ( str:: from_utf8 ( v) ?) )
73+ }
74+
75+ /// Converts a vector of bytes to a [`ColdString`] without checking that the string contains
76+ /// valid UTF-8.
77+ ///
78+ /// See the safe version, [`ColdString::from_utf8`], for more details.
79+ ///
80+ /// # Examples
81+ ///
82+ /// Basic usage:
83+ ///
84+ /// ```
85+ /// # use cold_string::ColdString;
86+ /// // some bytes, in a vector
87+ /// let sparkle_heart = [240, 159, 146, 150];
88+ ///
89+ /// let sparkle_heart = unsafe {
90+ /// ColdString::from_utf8_unchecked(&sparkle_heart)
91+ /// };
92+ ///
93+ /// assert_eq!("💖", sparkle_heart);
94+ /// ```
95+ pub unsafe fn from_utf8_unchecked ( v : & [ u8 ] ) -> Self {
96+ Self :: new ( str:: from_utf8_unchecked ( v) )
97+ }
98+
99+ /// Creates a new [`ColdString`] from any type that implements `AsRef<str>`.
100+ /// If the string is short enough, then it will be inlined on the stack.
101+ pub fn new < T : AsRef < str > > ( x : T ) -> Self {
102+ let s = x. as_ref ( ) ;
31103 if s. len ( ) < WIDTH {
32104 Self :: new_inline ( s)
33105 } else {
@@ -36,12 +108,12 @@ impl ColdString {
36108 }
37109
38110 #[ inline]
39- fn is_inline ( & self ) -> bool {
111+ const fn is_inline ( & self ) -> bool {
40112 self . 0 [ 0 ] & 1 == 1
41113 }
42114
43115 #[ inline]
44- fn new_inline ( s : & str ) -> Self {
116+ const fn new_inline ( s : & str ) -> Self {
45117 debug_assert ! ( s. len( ) < WIDTH ) ;
46118 let mut buf = [ 0u8 ; WIDTH ] ;
47119 unsafe {
@@ -78,17 +150,34 @@ impl ColdString {
78150
79151 #[ inline]
80152 fn heap_ptr ( & self ) -> * mut u8 {
153+ // Can be const in 1.91
81154 debug_assert ! ( !self . is_inline( ) ) ;
82155 let addr = usize:: from_le_bytes ( self . 0 ) ;
83156 debug_assert ! ( addr % 2 == 0 ) ;
84157 with_exposed_provenance_mut :: < u8 > ( addr)
85158 }
86159
87160 #[ inline]
88- fn inline_len ( & self ) -> usize {
161+ const fn inline_len ( & self ) -> usize {
89162 self . 0 [ 0 ] as usize >> 1
90163 }
91164
165+ /// Returns the length of this `ColdString`, in bytes, not [`char`]s or
166+ /// graphemes. In other words, it might not be what a human considers the
167+ /// length of the string.
168+ ///
169+ /// # Examples
170+ ///
171+ /// ```
172+ /// use cold_string::ColdString;
173+ ///
174+ /// let a = ColdString::from("foo");
175+ /// assert_eq!(a.len(), 3);
176+ ///
177+ /// let fancy_f = String::from("ƒoo");
178+ /// assert_eq!(fancy_f.len(), 4);
179+ /// assert_eq!(fancy_f.chars().count(), 3);
180+ /// ```
92181 #[ inline]
93182 pub fn len ( & self ) -> usize {
94183 if self . is_inline ( ) {
@@ -119,6 +208,19 @@ impl ColdString {
119208 slice:: from_raw_parts ( data, len as usize )
120209 }
121210
211+ /// Returns a byte slice of this `ColdString`'s contents.
212+ ///
213+ /// The inverse of this method is [`from_utf8`].
214+ ///
215+ /// [`from_utf8`]: String::from_utf8
216+ ///
217+ /// # Examples
218+ ///
219+ /// ```
220+ /// let s = cold_string::ColdString::from("hello");
221+ ///
222+ /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
223+ /// ```
122224 #[ inline]
123225 pub fn as_bytes ( & self ) -> & [ u8 ] {
124226 match self . is_inline ( ) {
@@ -127,6 +229,14 @@ impl ColdString {
127229 }
128230 }
129231
232+ /// Returns a string slice containing the entire [`ColdString`].
233+ ///
234+ /// # Examples
235+ /// ```
236+ /// let s = cold_string::ColdString::new("hello");
237+ ///
238+ /// assert_eq!(s.as_str(), "hello");
239+ /// ```
130240 #[ inline]
131241 pub fn as_str ( & self ) -> & str {
132242 unsafe { str:: from_utf8_unchecked ( self . as_bytes ( ) ) }
0 commit comments