Skip to content

Commit 4653e12

Browse files
committed
docs
1 parent cc835d6 commit 4653e12

4 files changed

Lines changed: 126 additions & 16 deletions

File tree

.github/workflows/rust.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
toolchain: "1.84"
4242
override: true
4343
- name: Install cargo-hack
44-
run: cargo install cargo-hack --version 0.6.37 --force --locked
44+
run: cargo +1.84 install cargo-hack --version 0.6.37 --force --locked
4545
- name: Build
46-
run: cargo hack build --verbose --release --feature-powerset
46+
run: cargo +1.84 hack build --verbose --release --feature-powerset
4747

README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,26 +37,26 @@ pub struct ColdString([u8; 8]);
3737
The array acts as either a pointer to heap data for strings longer than 7 bytes or is the inlined data itself.
3838
## Inline Mode
3939
`self.0[1]` to `self.0[7]` store the bytes of string. In the least significant byte, `self.0[0]`, the least significant bit signifies the inline/heap flag, and is set to "1" for inline mode. The next bits encode the length (always between 0 and 7).
40-
```ignore
40+
```text,ignore
4141
b0 b1 b2 b3 b4 b5 b6 b7
4242
b0 = <7 bit len> | 1
4343
```
4444
For example, `"qwerty" = [13, 'q', 'w', 'e', 'r', 't', 'y', 0]`, where 13 is `"qwerty".len() << 1 | 1`.
4545

4646
## Heap Mode
4747
The bytes act as a pointer to heap. The data on the heap has alignment 2, causing the least significant bit to always be 0 (since alignment 2 implies `addr % 2 == 0`), signifying heap mode. On the heap, the data starts with a variable length integer encoding of the length, followed by the bytes.
48-
```ignore
48+
```text,ignore
4949
ptr --> <var int length> <data>
5050
```
5151

5252
# Memory Comparisons
5353

54-
<img width="1920" height="967" alt="string_memory" src="https://github.com/user-attachments/assets/25f5acf8-9a3e-4a4c-b2f1-b2fb972cc9c8" />
54+
![string_memory](https://github.com/user-attachments/assets/25f5acf8-9a3e-4a4c-b2f1-b2fb972cc9c8)
5555

5656
## Measured from System Memory
5757

5858
### 0..=4
59-
```ignore
59+
```text,ignore
6060
Crate, len 0..=4 | RSS (B) | Virtual (B)
6161
-------------------|--------------|-------------
6262
std | 36.9 | 38.4
@@ -67,7 +67,7 @@ cold-string | 8.0 | 8.0
6767
```
6868

6969
### 0..=8
70-
```ignore
70+
```text,ignore
7171
Crate, len 0..=8 | RSS (B) | Virtual (B)
7272
-------------------|--------------|-------------
7373
std | 38.4 | 40.0
@@ -78,7 +78,7 @@ cold-string | 11.2 | 11.7
7878
```
7979

8080
### 0..=16
81-
```ignore
81+
```text,ignore
8282
Crate, len 0..=16 | RSS (B) | Virtual (B)
8383
-------------------|--------------|-------------
8484
std | 46.8 | 48.6
@@ -89,7 +89,7 @@ cold-string | 24.9 | 26.7
8989
```
9090

9191
### 0..=32
92-
```ignore
92+
```text,ignore
9393
Crate, len 0..=32 | RSS (B) | Virtual (B)
9494
-------------------|--------------|-------------
9595
std | 55.3 | 57.4
@@ -100,7 +100,7 @@ cold-string | 36.5 | 38.8
100100
```
101101

102102
### 0..=64
103-
```ignore
103+
```text,ignore
104104
Crate, len 0..=64 | RSS (B) | Virtual (B)
105105
-------------------|--------------|-------------
106106
std | 71.4 | 73.7

src/lib.rs

Lines changed: 114 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ extern crate alloc;
66

77
use alloc::{
88
alloc::{alloc, dealloc, Layout},
9+
str::Utf8Error,
910
string::String,
1011
};
1112
use core::{
@@ -23,11 +24,82 @@ use crate::vint::VarInt;
2324
const HEAP_ALIGN: usize = 2;
2425
const WIDTH: usize = mem::size_of::<usize>();
2526

27+
/// Compact representation of immutable UTF-8 strings. Optimized for memory usage and struct packing.
28+
///
29+
/// # Example
30+
/// ```
31+
/// let s = cold_string::ColdString::new("qwerty");
32+
/// assert_eq!(s.as_str(), "qwerty");
33+
/// ```
34+
/// ```
35+
/// use std::mem;
36+
/// use cold_string::ColdString;
37+
///
38+
/// assert_eq!(mem::size_of::<ColdString>(), 8);
39+
/// assert_eq!(mem::align_of::<ColdString>(), 1);
40+
/// assert_eq!(mem::size_of::<(ColdString, u8)>(), 9);
41+
/// assert_eq!(mem::align_of::<(ColdString, u8)>(), 1);
42+
/// ```
2643
#[repr(transparent)]
2744
pub struct ColdString([u8; WIDTH]);
2845

2946
impl ColdString {
30-
pub fn new(s: &str) -> Self {
47+
/// Convert a slice of bytes into a [`ColdString`].
48+
///
49+
/// A [`ColdString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8).
50+
/// This method converts from an arbitrary contiguous collection of bytes into a
51+
/// [`ColdString`], failing if the provided bytes are not `UTF-8`.
52+
///
53+
/// # Examples
54+
/// ### Valid UTF-8
55+
/// ```
56+
/// # use cold_string::ColdString;
57+
/// let bytes = [240, 159, 166, 128, 240, 159, 146, 175];
58+
/// let compact = ColdString::from_utf8(&bytes).expect("valid UTF-8");
59+
///
60+
/// assert_eq!(compact, "🦀💯");
61+
/// ```
62+
///
63+
/// ### Invalid UTF-8
64+
/// ```
65+
/// # use cold_string::ColdString;
66+
/// let bytes = [255, 255, 255];
67+
/// let result = ColdString::from_utf8(&bytes);
68+
///
69+
/// assert!(result.is_err());
70+
/// ```
71+
pub fn from_utf8(v: &[u8]) -> Result<Self, Utf8Error> {
72+
Ok(Self::new(str::from_utf8(v)?))
73+
}
74+
75+
/// Converts a vector of bytes to a [`ColdString`] without checking that the string contains
76+
/// valid UTF-8.
77+
///
78+
/// See the safe version, [`ColdString::from_utf8`], for more details.
79+
///
80+
/// # Examples
81+
///
82+
/// Basic usage:
83+
///
84+
/// ```
85+
/// # use cold_string::ColdString;
86+
/// // some bytes, in a vector
87+
/// let sparkle_heart = [240, 159, 146, 150];
88+
///
89+
/// let sparkle_heart = unsafe {
90+
/// ColdString::from_utf8_unchecked(&sparkle_heart)
91+
/// };
92+
///
93+
/// assert_eq!("💖", sparkle_heart);
94+
/// ```
95+
pub unsafe fn from_utf8_unchecked(v: &[u8]) -> Self {
96+
Self::new(str::from_utf8_unchecked(v))
97+
}
98+
99+
/// Creates a new [`ColdString`] from any type that implements `AsRef<str>`.
100+
/// If the string is short enough, then it will be inlined on the stack.
101+
pub fn new<T: AsRef<str>>(x: T) -> Self {
102+
let s = x.as_ref();
31103
if s.len() < WIDTH {
32104
Self::new_inline(s)
33105
} else {
@@ -36,12 +108,12 @@ impl ColdString {
36108
}
37109

38110
#[inline]
39-
fn is_inline(&self) -> bool {
111+
const fn is_inline(&self) -> bool {
40112
self.0[0] & 1 == 1
41113
}
42114

43115
#[inline]
44-
fn new_inline(s: &str) -> Self {
116+
const fn new_inline(s: &str) -> Self {
45117
debug_assert!(s.len() < WIDTH);
46118
let mut buf = [0u8; WIDTH];
47119
unsafe {
@@ -78,17 +150,34 @@ impl ColdString {
78150

79151
#[inline]
80152
fn heap_ptr(&self) -> *mut u8 {
153+
// Can be const in 1.91
81154
debug_assert!(!self.is_inline());
82155
let addr = usize::from_le_bytes(self.0);
83156
debug_assert!(addr % 2 == 0);
84157
with_exposed_provenance_mut::<u8>(addr)
85158
}
86159

87160
#[inline]
88-
fn inline_len(&self) -> usize {
161+
const fn inline_len(&self) -> usize {
89162
self.0[0] as usize >> 1
90163
}
91164

165+
/// Returns the length of this `ColdString`, in bytes, not [`char`]s or
166+
/// graphemes. In other words, it might not be what a human considers the
167+
/// length of the string.
168+
///
169+
/// # Examples
170+
///
171+
/// ```
172+
/// use cold_string::ColdString;
173+
///
174+
/// let a = ColdString::from("foo");
175+
/// assert_eq!(a.len(), 3);
176+
///
177+
/// let fancy_f = String::from("ƒoo");
178+
/// assert_eq!(fancy_f.len(), 4);
179+
/// assert_eq!(fancy_f.chars().count(), 3);
180+
/// ```
92181
#[inline]
93182
pub fn len(&self) -> usize {
94183
if self.is_inline() {
@@ -119,6 +208,19 @@ impl ColdString {
119208
slice::from_raw_parts(data, len as usize)
120209
}
121210

211+
/// Returns a byte slice of this `ColdString`'s contents.
212+
///
213+
/// The inverse of this method is [`from_utf8`].
214+
///
215+
/// [`from_utf8`]: String::from_utf8
216+
///
217+
/// # Examples
218+
///
219+
/// ```
220+
/// let s = cold_string::ColdString::from("hello");
221+
///
222+
/// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
223+
/// ```
122224
#[inline]
123225
pub fn as_bytes(&self) -> &[u8] {
124226
match self.is_inline() {
@@ -127,6 +229,14 @@ impl ColdString {
127229
}
128230
}
129231

232+
/// Returns a string slice containing the entire [`ColdString`].
233+
///
234+
/// # Examples
235+
/// ```
236+
/// let s = cold_string::ColdString::new("hello");
237+
///
238+
/// assert_eq!(s.as_str(), "hello");
239+
/// ```
130240
#[inline]
131241
pub fn as_str(&self) -> &str {
132242
unsafe { str::from_utf8_unchecked(self.as_bytes()) }

src/vint.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
pub struct VarInt;
22

33
impl VarInt {
4-
pub fn write(mut value: u64, buf: &mut [u8; 10]) -> usize {
4+
pub const fn write(mut value: u64, buf: &mut [u8; 10]) -> usize {
55
let mut i = 0;
66
loop {
77
let mut byte = (value & 0x7F) as u8;
@@ -19,7 +19,7 @@ impl VarInt {
1919
}
2020

2121
#[allow(unsafe_op_in_unsafe_fn)]
22-
pub unsafe fn read(ptr: *const u8) -> (u64, usize) {
22+
pub const unsafe fn read(ptr: *const u8) -> (u64, usize) {
2323
let mut result = 0u64;
2424
let mut shift = 0;
2525
let mut i = 0;

0 commit comments

Comments
 (0)