1+ /*
2+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ */
16+
17+ /*
18+ * N.B. You most likely do _not_ want to use RWSpinLock or any other
19+ * kind of spinlock. Use SharedMutex instead.
20+ *
21+ * In short, spinlocks in preemptive multi-tasking operating systems
22+ * have serious problems and fast mutexes like SharedMutex are almost
23+ * certainly the better choice, because letting the OS scheduler put a
24+ * thread to sleep is better for system responsiveness and throughput
25+ * than wasting a timeslice repeatedly querying a lock held by a
26+ * thread that's blocked, and you can't prevent userspace
27+ * programs blocking.
28+ *
29+ * Spinlocks in an operating system kernel make much more sense than
30+ * they do in userspace.
31+ *
32+ * -------------------------------------------------------------------
33+ *
34+ * Two Read-Write spin lock implementations.
35+ *
36+ * Ref: http://locklessinc.com/articles/locks
37+ *
38+ * Both locks here are faster than pthread_rwlock and have very low
39+ * overhead (usually 20-30ns). They don't use any system mutexes and
40+ * are very compact (4/8 bytes), so are suitable for per-instance
41+ * based locking, particularly when contention is not expected.
42+ *
43+ * For a spinlock, RWSpinLock is a reasonable choice. (See the note
44+ * about for why a spin lock is frequently a bad idea generally.)
45+ * RWSpinLock has minimal overhead, and comparable contention
46+ * performance when the number of competing threads is less than or
47+ * equal to the number of logical CPUs. Even as the number of
48+ * threads gets larger, RWSpinLock can still be very competitive in
49+ * READ, although it is slower on WRITE, and also inherently unfair
50+ * to writers.
51+ *
52+ * RWSpinLock handles 2^30 - 1 concurrent readers.
53+ */
54+
55+ #pragma once
56+
57+ /*
58+ ========================================================================
59+ Benchmark on (Intel(R) Xeon(R) CPU L5630 @ 2.13GHz) 8 cores(16 HTs)
60+ ========================================================================
61+
62+ ------------------------------------------------------------------------------
63+ 1. Single thread benchmark (read/write lock + unlock overhead)
64+ Benchmark Iters Total t t/iter iter/sec
65+ -------------------------------------------------------------------------------
66+ * BM_RWSpinLockRead 100000 1.786 ms 17.86 ns 53.4M
67+ +30.5% BM_RWSpinLockWrite 100000 2.331 ms 23.31 ns 40.91M
68+ + 175% BM_PThreadRWMutexRead 100000 4.917 ms 49.17 ns 19.4M
69+ + 166% BM_PThreadRWMutexWrite 100000 4.757 ms 47.57 ns 20.05M
70+
71+ ------------------------------------------------------------------------------
72+ 2. Contention Benchmark 90% read 10% write
73+ Benchmark hits average min max sigma
74+ ------------------------------------------------------------------------------
75+ ---------- 8 threads ------------
76+ RWSpinLock Write 142666 220ns 78ns 40.8us 269ns
77+ RWSpinLock Read 1282297 222ns 80ns 37.7us 248ns
78+ pthread_rwlock_t Write 84248 2.48us 99ns 269us 8.19us
79+ pthread_rwlock_t Read 761646 933ns 101ns 374us 3.25us
80+
81+ ---------- 16 threads ------------
82+ RWSpinLock Write 124236 237ns 78ns 261us 801ns
83+ RWSpinLock Read 1115807 236ns 78ns 2.27ms 2.17us
84+ pthread_rwlock_t Write 83363 7.12us 99ns 785us 28.1us
85+ pthread_rwlock_t Read 754978 2.18us 101ns 1.02ms 14.3us
86+
87+ ---------- 50 threads ------------
88+ RWSpinLock Write 131142 1.37us 82ns 7.53ms 68.2us
89+ RWSpinLock Read 1181240 262ns 78ns 6.62ms 12.7us
90+ pthread_rwlock_t Write 80849 112us 103ns 4.52ms 263us
91+ pthread_rwlock_t Read 728698 24us 101ns 7.28ms 194us
92+
93+ */
94+
95+
96+ #include < algorithm>
97+ #include < atomic>
98+ #include < thread>
99+
100+
101+ namespace from_folly {
102+
103+ /*
104+ * A simple, small (4-bytes), but unfair rwlock. Use it when you want
105+ * a nice writer and don't expect a lot of write/read contention, or
106+ * when you need small rwlocks since you are creating a large number
107+ * of them.
108+ *
109+ * Note that the unfairness here is extreme: if the lock is
110+ * continually accessed for read, writers will never get a chance. If
111+ * the lock can be that highly contended this class is probably not an
112+ * ideal choice anyway.
113+ *
114+ * It currently implements most of the Lockable, SharedLockable and
115+ * UpgradeLockable concepts except the TimedLockable related locking/unlocking
116+ * interfaces.
117+ */
118+ class RWSpinLock {
119+ enum : int32_t { READER = 4 , UPGRADED = 2 , WRITER = 1 };
120+
121+ public:
122+ constexpr RWSpinLock () : bits_(0 ) {}
123+
124+ RWSpinLock (RWSpinLock const &) = delete ;
125+ RWSpinLock& operator =(RWSpinLock const &) = delete ;
126+
127+ // Lockable Concept
128+ void lock () {
129+ uint_fast32_t count = 0 ;
130+ while (!try_lock ()) [[unlikely]] {
131+ if (++count > 1000 ) {
132+ std::this_thread::yield ();
133+ }
134+ }
135+ }
136+
137+ // Writer is responsible for clearing up both the UPGRADED and WRITER bits.
138+ void unlock () {
139+ static_assert (READER > WRITER + UPGRADED, " wrong bits!" );
140+ bits_.fetch_and (~(WRITER | UPGRADED), std::memory_order_release);
141+ }
142+
143+ // SharedLockable Concept
144+ void lock_shared () {
145+ uint_fast32_t count = 0 ;
146+ while (!try_lock_shared ()) [[unlikely]] {
147+ if (++count > 1000 ) {
148+ std::this_thread::yield ();
149+ }
150+ }
151+ }
152+
153+ void unlock_shared () { bits_.fetch_add (-READER, std::memory_order_release); }
154+
155+ // Downgrade the lock from writer status to reader status.
156+ void unlock_and_lock_shared () {
157+ bits_.fetch_add (READER, std::memory_order_acquire);
158+ unlock ();
159+ }
160+
161+ // UpgradeLockable Concept
162+ void lock_upgrade () {
163+ uint_fast32_t count = 0 ;
164+ while (!try_lock_upgrade ()) {
165+ if (++count > 1000 ) {
166+ std::this_thread::yield ();
167+ }
168+ }
169+ }
170+
171+ void unlock_upgrade () {
172+ bits_.fetch_add (-UPGRADED, std::memory_order_acq_rel);
173+ }
174+
175+ // unlock upgrade and try to acquire write lock
176+ void unlock_upgrade_and_lock () {
177+ int64_t count = 0 ;
178+ while (!try_unlock_upgrade_and_lock ()) {
179+ if (++count > 1000 ) {
180+ std::this_thread::yield ();
181+ }
182+ }
183+ }
184+
185+ // unlock upgrade and read lock atomically
186+ void unlock_upgrade_and_lock_shared () {
187+ bits_.fetch_add (READER - UPGRADED, std::memory_order_acq_rel);
188+ }
189+
190+ // write unlock and upgrade lock atomically
191+ void unlock_and_lock_upgrade () {
192+ // need to do it in two steps here -- as the UPGRADED bit might be OR-ed at
193+ // the same time when other threads are trying do try_lock_upgrade().
194+ bits_.fetch_or (UPGRADED, std::memory_order_acquire);
195+ bits_.fetch_add (-WRITER, std::memory_order_release);
196+ }
197+
198+ // Attempt to acquire writer permission. Return false if we didn't get it.
199+ bool try_lock () {
200+ int32_t expect = 0 ;
201+ return bits_.compare_exchange_strong (
202+ expect, WRITER, std::memory_order_acq_rel);
203+ }
204+
205+ // Try to get reader permission on the lock. This can fail if we
206+ // find out someone is a writer or upgrader.
207+ // Setting the UPGRADED bit would allow a writer-to-be to indicate
208+ // its intention to write and block any new readers while waiting
209+ // for existing readers to finish and release their read locks. This
210+ // helps avoid starving writers (promoted from upgraders).
211+ bool try_lock_shared () {
212+ // fetch_add is considerably (100%) faster than compare_exchange,
213+ // so here we are optimizing for the common (lock success) case.
214+ int32_t value = bits_.fetch_add (READER, std::memory_order_acquire);
215+ if (value & (WRITER | UPGRADED)) [[unlikely]] {
216+ bits_.fetch_add (-READER, std::memory_order_release);
217+ return false ;
218+ }
219+ return true ;
220+ }
221+
222+ // try to unlock upgrade and write lock atomically
223+ bool try_unlock_upgrade_and_lock () {
224+ int32_t expect = UPGRADED;
225+ return bits_.compare_exchange_strong (
226+ expect, WRITER, std::memory_order_acq_rel);
227+ }
228+
229+ // try to acquire an upgradable lock.
230+ bool try_lock_upgrade () {
231+ int32_t value = bits_.fetch_or (UPGRADED, std::memory_order_acquire);
232+
233+ // Note: when failed, we cannot flip the UPGRADED bit back,
234+ // as in this case there is either another upgrade lock or a write lock.
235+ // If it's a write lock, the bit will get cleared up when that lock's done
236+ // with unlock().
237+ return ((value & (UPGRADED | WRITER)) == 0 );
238+ }
239+
240+ // mainly for debugging purposes.
241+ int32_t bits () const { return bits_.load (std::memory_order_acquire); }
242+
243+ private:
244+ std::atomic<int32_t > bits_;
245+ };
246+
247+ }
0 commit comments