xenium
lock_free_ref_count.hpp
1 //
2 // Copyright (c) 2018-2020 Manuel Pöter.
3 // Licensed under the MIT License. See LICENSE file in the project root for full license information.
4 //
5 
6 #ifndef LOCK_FREE_REF_COUNT_IMPL
7  #error "This is an impl file and must not be included directly!"
8 #endif
9 
10 #ifdef _MSC_VER
11  #pragma warning(push)
12  #pragma warning(disable : 4127) // conditional expression is constant
13 #endif
14 
15 namespace xenium::reclamation {
16 
17 template <class Traits>
18 template <class T, std::size_t N, class Deleter>
19 class lock_free_ref_count<Traits>::enable_concurrent_ptr<T, N, Deleter>::free_list {
20 public:
21  T* pop() {
22  if (max_local_elements > 0) {
23  if (auto* result = local_free_list().pop()) {
24  return result;
25  }
26  }
27 
28  guard_ptr guard;
29 
30  while (true) {
31  // (1) - this acquire-load synchronizes-with the release-CAS (3)
32  guard = acquire_guard(head, std::memory_order_acquire);
33  if (guard.get() == nullptr) {
34  return nullptr;
35  }
36 
37  // Note: ref_count can be anything here since multiple threads
38  // could have gotten a reference to the node on the freelist.
39  marked_ptr expected(guard);
40  auto next = guard->next_free().load(std::memory_order_relaxed);
41  // since head is only changed via CAS operations it is sufficient to use relaxed order
42  // for this operation as it is always part of a release-sequence headed by (3)
43  if (head.compare_exchange_weak(expected, next, std::memory_order_relaxed)) {
44  assert((guard->ref_count().load(std::memory_order_relaxed) & RefCountClaimBit) != 0 &&
45  "ClaimBit must be set for a node on the free list");
46 
47  auto* ptr = guard.get();
48  ptr->ref_count().fetch_sub(RefCountClaimBit, std::memory_order_relaxed); // clear claim bit
49  ptr->next_free().store(nullptr, std::memory_order_relaxed);
50  guard.ptr.reset(); // reset guard_ptr to prevent decrement of ref_count
51  return ptr;
52  }
53  }
54  }
55 
56  void push(T* node) {
57  assert(node->ref_count().load(std::memory_order_relaxed) & RefCountClaimBit &&
58  "ClaimBit must be set for a node to be put on the free list");
59  if (max_local_elements > 0 && local_free_list().push(node)) {
60  return;
61  }
62 
63  add_nodes(node, node);
64  }
65 
66 private:
67  void add_nodes(T* first, T* last) {
68  // (2) - this acquire-load synchronizes-with the release-CAS (3)
69  auto old = head.load(std::memory_order_acquire);
70  do {
71  last->next_free().store(old, std::memory_order_relaxed);
72  // (3) - if this release-CAS succeeds, it synchronizes-with the acquire-loads (1, 2)
73  // if it failes, the reload synchronizes-with itself (3)
74  } while (!head.compare_exchange_weak(old, first, std::memory_order_release, std::memory_order_acquire));
75  }
76 
77  // the free list is implemented as a FILO single linked list
78  // the LSB of a node's ref_count acts as claim bit, so for all nodes on the free list the bit has to be set
79  concurrent_ptr<T, N> head;
80 
81  class thread_local_free_list {
82  public:
83  ~thread_local_free_list() noexcept {
84  if (head == nullptr) {
85  return;
86  }
87  auto* first = head;
88  auto* last = head;
89  auto next = last->next_free().load(std::memory_order_relaxed);
90  while (next) {
91  last = next.get();
92  next = next->next_free().load(std::memory_order_relaxed);
93  }
94  global_free_list.add_nodes(first, last);
95  }
96 
97  bool push(T* node) {
98  if (number_of_elements >= max_local_elements) {
99  return false;
100  }
101  node->next_free().store(head, std::memory_order_relaxed);
102  head = node;
103  ++number_of_elements;
104  return true;
105  }
106 
107  T* pop() {
108  auto* result = head;
109  if (result) {
110  assert(number_of_elements > 0);
111  head = result->next_free().load(std::memory_order_relaxed).get();
112  --number_of_elements;
113  // clear claim bit and increment ref_count
114  result->ref_count().fetch_add(RefCountInc - RefCountClaimBit, std::memory_order_relaxed);
115  result->next_free().store(nullptr, std::memory_order_relaxed);
116  }
117  return result;
118  }
119 
120  private:
121  size_t number_of_elements = 0;
122  T* head = nullptr;
123  };
124 
125  static constexpr size_t max_local_elements = Traits::thread_local_free_list_size;
126 
127  static thread_local_free_list& local_free_list() {
128  // workaround for gcc issue causing redefinition of __tls_guard when
129  // defining this as static thread_local member of free_list.
130  alignas(64) static thread_local thread_local_free_list local_free_list;
131  return local_free_list;
132  }
133 };
134 
135 template <class Traits>
136 template <class T, std::size_t N, class Deleter>
137 void* lock_free_ref_count<Traits>::enable_concurrent_ptr<T, N, Deleter>::operator new(size_t sz) {
138  assert(sz == sizeof(T) && "Cannot handle allocations of anything other than T instances");
139  T* result = global_free_list.pop();
140  if (result == nullptr) {
141  auto h = static_cast<header*>(::operator new(sz + sizeof(header)));
142  h->ref_count.store(RefCountInc, std::memory_order_release);
143  result = static_cast<T*>(static_cast<void*>(h + 1));
144  }
145 
146  return result;
147 }
148 
149 template <class Traits>
150 template <class T, std::size_t N, class Deleter>
151 void lock_free_ref_count<Traits>::enable_concurrent_ptr<T, N, Deleter>::operator delete(void* p) {
152  auto* node = static_cast<T*>(p);
153  assert((node->ref_count().load(std::memory_order_relaxed) & RefCountClaimBit) == 0);
154 
155  if (node->decrement_refcnt()) {
156  node->push_to_free_list();
157  }
158 }
159 
160 template <class Traits>
161 template <class T, std::size_t N, class Deleter>
162 bool lock_free_ref_count<Traits>::enable_concurrent_ptr<T, N, Deleter>::decrement_refcnt() {
163  unsigned old_refcnt, new_refcnt;
164  do {
165  old_refcnt = ref_count().load(std::memory_order_relaxed);
166  new_refcnt = old_refcnt - RefCountInc;
167  if (new_refcnt == 0) {
168  new_refcnt = RefCountClaimBit;
169  }
170  // (4) - this release/acquire CAS synchronizes with itself
171  } while (!ref_count().compare_exchange_weak(old_refcnt,
172  new_refcnt,
173  new_refcnt == RefCountClaimBit ? std::memory_order_acquire
174  : std::memory_order_release,
175  std::memory_order_relaxed));
176 
177  // free node iff ref_count is zero AND we're the first thread to "claim" this node for reclamation.
178  return ((old_refcnt - new_refcnt) & RefCountClaimBit) != 0;
179 }
180 
181 template <class Traits>
182 template <class T, class MarkedPtr>
183 lock_free_ref_count<Traits>::guard_ptr<T, MarkedPtr>::guard_ptr(const MarkedPtr& p) noexcept : base(p) {
184  if (this->ptr.get() != nullptr) {
185  this->ptr->ref_count().fetch_add(RefCountInc, std::memory_order_relaxed);
186  }
187 }
188 
189 template <class Traits>
190 template <class T, class MarkedPtr>
191 lock_free_ref_count<Traits>::guard_ptr<T, MarkedPtr>::guard_ptr(const guard_ptr& p) noexcept : guard_ptr(p.ptr) {}
192 
193 template <class Traits>
194 template <class T, class MarkedPtr>
195 lock_free_ref_count<Traits>::guard_ptr<T, MarkedPtr>::guard_ptr(guard_ptr&& p) noexcept : base(p.ptr) {
196  p.ptr.reset();
197 }
198 
199 template <class Traits>
200 template <class T, class MarkedPtr>
201 auto lock_free_ref_count<Traits>::guard_ptr<T, MarkedPtr>::operator=(const guard_ptr& p) -> guard_ptr& {
202  if (&p == this) {
203  return *this;
204  }
205 
206  reset();
207  this->ptr = p.ptr;
208  if (this->ptr.get() != nullptr) {
209  this->ptr->ref_count().fetch_add(RefCountInc, std::memory_order_relaxed);
210  }
211  return *this;
212 }
213 
214 template <class Traits>
215 template <class T, class MarkedPtr>
216 auto lock_free_ref_count<Traits>::guard_ptr<T, MarkedPtr>::operator=(guard_ptr&& p) noexcept -> guard_ptr& {
217  if (&p == this) {
218  return *this;
219  }
220 
221  reset();
222  this->ptr = std::move(p.ptr);
223  p.ptr.reset();
224  return *this;
225 }
226 
227 template <class Traits>
228 template <class T, class MarkedPtr>
229 void lock_free_ref_count<Traits>::guard_ptr<T, MarkedPtr>::acquire(const concurrent_ptr<T>& p,
230  std::memory_order order) noexcept {
231  for (;;) {
232  reset();
233  // FIXME: If this load is relaxed, TSan reports a data race between the following
234  // fetch-add and the initialization of the ref_count field. I tend to disagree, as
235  // the fetch_add should be ordered after the initial store (operator new) in the
236  // modification order of ref_count. Therefore the acquire-fetch-add should
237  // synchronize-with the release store.
238  // I created a GitHub issue:
239  // But for now, let's make this an acquire-load to make TSan happy.
240  auto q = p.load(std::memory_order_acquire);
241  this->ptr = q;
242  if (q.get() == nullptr) {
243  return;
244  }
245 
246  // (5) - this acquire-fetch_add synchronizes-with the release-fetch_sub (7)
247  // this ensures that a change to p becomes visible
248  q->ref_count().fetch_add(RefCountInc, std::memory_order_acquire);
249 
250  if (q == p.load(order)) {
251  return;
252  }
253  }
254 }
255 
256 template <class Traits>
257 template <class T, class MarkedPtr>
258 bool lock_free_ref_count<Traits>::guard_ptr<T, MarkedPtr>::acquire_if_equal(const concurrent_ptr<T>& p,
259  const MarkedPtr& expected,
260  std::memory_order order) noexcept {
261  reset();
262  // FIXME: same issue with TSan as in acquire (see above).
263  auto q = p.load(std::memory_order_acquire);
264  if (q != expected) {
265  return false;
266  }
267 
268  this->ptr = q;
269  if (q.get() == nullptr) {
270  return true;
271  }
272 
273  // (6) - this acquire-fetch_add synchronizes-with the release-fetch_sub (7)
274  // this ensures that a change to p becomes visible
275  q->ref_count().fetch_add(RefCountInc, std::memory_order_acquire);
276 
277  if (q == p.load(order)) {
278  return true;
279  }
280 
281  reset();
282  return false;
283 }
284 
285 template <class Traits>
286 template <class T, class MarkedPtr>
287 void lock_free_ref_count<Traits>::guard_ptr<T, MarkedPtr>::reset() noexcept {
288  auto* p = this->ptr.get();
289  this->ptr.reset();
290  if (p == nullptr) {
291  return;
292  }
293 
294  if (p->decrement_refcnt()) {
295  if (!p->is_destroyed()) {
296  p->~T();
297  }
298 
299  p->push_to_free_list();
300  }
301 }
302 
303 template <class Traits>
304 template <class T, class MarkedPtr>
305 void lock_free_ref_count<Traits>::guard_ptr<T, MarkedPtr>::reclaim(Deleter) noexcept {
306  if (this->ptr.get() != nullptr) {
307  assert(this->ptr->refs() > 1);
308  // ref_count was initialized with "1", so we need an additional
309  // decrement to ensure that the node gets reclaimed.
310  // ref_count cannot drop to zero here -> no check required.
311  // (7) - this release-fetch-sub synchronizes-with the acquire-fetch-add (5, 6)
312  this->ptr->ref_count().fetch_sub(RefCountInc, std::memory_order_release);
313  }
314  reset();
315 }
316 
317 template <class Traits>
318 template <class T, std::size_t N, class Deleter>
319 typename lock_free_ref_count<Traits>::template enable_concurrent_ptr<T, N, Deleter>::free_list
320  lock_free_ref_count<Traits>::enable_concurrent_ptr<T, N, Deleter>::global_free_list;
321 
322 #ifdef TRACK_ALLOCATIONS
323 template <class Traits>
324 inline detail::allocation_counter& lock_free_ref_count<Traits>::allocation_counter() {
325  return allocation_counter_;
326 };
327 
328 template <class Traits>
329 inline void lock_free_ref_count<Traits>::count_allocation() {
330  allocation_counter().count_allocation();
331 }
332 
333 template <class Traits>
334 inline void lock_free_ref_count<Traits>::count_reclamation() {
335  allocation_counter().count_reclamation();
336 }
337 #endif
338 } // namespace xenium::reclamation
339 
340 #ifdef _MSC_VER
341  #pragma warning(pop)
342 #endif