xenium
harris_michael_hash_map.hpp
1 //
2 // Copyright (c) 2018-2020 Manuel Pöter.
3 // Licensed under the MIT License. See LICENSE file in the project root for full license information.
4 //
5 
6 #ifndef XENIUM_HARRIS_MICHAEL_HASH_MAP_HPP
7 #define XENIUM_HARRIS_MICHAEL_HASH_MAP_HPP
8 
9 #include <xenium/acquire_guard.hpp>
10 #include <xenium/backoff.hpp>
11 #include <xenium/hash.hpp>
12 #include <xenium/parameter.hpp>
13 #include <xenium/policy.hpp>
14 #include <xenium/utils.hpp>
15 
16 #include <atomic>
17 #include <cassert>
18 
19 namespace xenium {
20 
21 namespace policy {
26  template <std::size_t Value>
27  struct buckets;
28 
37  template <class T>
38  struct map_to_bucket;
39 
49  template <bool Value>
50  struct memoize_hash;
51 } // namespace policy
52 
86 template <class Key, class Value, class... Policies>
88 public:
89  using value_type = std::pair<const Key, Value>;
90  using reclaimer = parameter::type_param_t<policy::reclaimer, parameter::nil, Policies...>;
91  using hash = parameter::type_param_t<policy::hash, xenium::hash<Key>, Policies...>;
92  using map_to_bucket = parameter::type_param_t<policy::map_to_bucket, utils::modulo<std::size_t>, Policies...>;
93  using backoff = parameter::type_param_t<policy::backoff, no_backoff, Policies...>;
94  static constexpr std::size_t num_buckets =
95  parameter::value_param_t<std::size_t, policy::buckets, 512, Policies...>::value;
96  static constexpr bool memoize_hash =
97  parameter::value_param_t<bool, policy::memoize_hash, !std::is_scalar<Key>::value, Policies...>::value;
98 
99  template <class... NewPolicies>
100  using with = harris_michael_hash_map<Key, Value, NewPolicies..., Policies...>;
101 
102  static_assert(parameter::is_set<reclaimer>::value, "reclaimer policy must be specified");
103 
104  class iterator;
105  class accessor;
106 
107  harris_michael_hash_map() = default;
109 
124  template <class... Args>
125  bool emplace(Args&&... args);
126 
143  template <class... Args>
144  std::pair<iterator, bool> emplace_or_get(Args&&... args);
145 
163  template <class... Args>
164  std::pair<iterator, bool> get_or_emplace(Key key, Args&&... args);
165 
185  template <class Factory>
186  std::pair<iterator, bool> get_or_emplace_lazy(Key key, Factory factory);
187 
198  bool erase(const Key& key);
199 
210  iterator erase(iterator pos);
211 
221  iterator find(const Key& key);
222 
231  bool contains(const Key& key);
232 
240  accessor operator[](const Key& key);
241 
246  iterator begin();
247 
254  iterator end();
255 
256 private:
257  struct node;
258  using hash_t = std::size_t;
259  using concurrent_ptr = typename reclaimer::template concurrent_ptr<node, 1>;
260  using marked_ptr = typename concurrent_ptr::marked_ptr;
261  using guard_ptr = typename concurrent_ptr::guard_ptr;
262 
263  template <typename Factory>
264  std::pair<iterator, bool> do_get_or_emplace_lazy(Key key, Factory node_factory);
265 
266  struct construct_without_hash {};
267 
268  struct data_without_hash {
269  value_type value;
270  template <class... Args>
271  explicit data_without_hash(hash_t /*hash*/, Args&&... args) : value(std::forward<Args>(args)...) {}
272  template <class... Args>
273  explicit data_without_hash(construct_without_hash, Args&&... args) : value(std::forward<Args>(args)...) {}
274  [[nodiscard]] hash_t get_hash() const { return hash{}(value.first); }
275  [[nodiscard]] bool greater_or_equal(hash_t /*h*/, const Key& key) const { return value.first >= key; }
276  };
277 
278  struct data_with_hash {
279  hash_t hash;
280  value_type value;
281  template <class... Args>
282  explicit data_with_hash(hash_t hash, Args&&... args) : hash(hash), value(std::forward<Args>(args)...) {}
283  template <class... Args>
284  explicit data_with_hash(construct_without_hash, Args&&... args) : value(std::forward<Args>(args)...) {
285  hash = harris_michael_hash_map::hash{}(value.first);
286  }
287  [[nodiscard]] hash_t get_hash() const { return hash; }
288  [[nodiscard]] bool greater_or_equal(hash_t h, const Key& key) const { return hash >= h && value.first >= key; }
289  };
290 
291  using data_t = std::conditional_t<memoize_hash, data_with_hash, data_without_hash>;
292 
293  struct node : reclaimer::template enable_concurrent_ptr<node, 1> {
294  data_t data;
295  concurrent_ptr next;
296  template <class... Args>
297  explicit node(Args&&... args) : data(std::forward<Args>(args)...), next() {}
298  };
299 
300  struct find_info {
301  concurrent_ptr* prev;
302  marked_ptr next{};
303  guard_ptr cur{};
304  guard_ptr save{};
305  };
306 
307  bool find(hash_t hash, const Key& key, std::size_t bucket, find_info& info, backoff& backoff);
308 
309  concurrent_ptr buckets[num_buckets];
310 };
311 
325 template <class Key, class Value, class... Policies>
326 class harris_michael_hash_map<Key, Value, Policies...>::iterator {
327 public:
328  using iterator_category = std::forward_iterator_tag;
329  using value_type = harris_michael_hash_map::value_type;
330  using difference_type = std::ptrdiff_t;
331  using pointer = value_type*;
332  using reference = value_type&;
333 
334  iterator(iterator&&) = default;
335  iterator(const iterator&) = default;
336 
337  iterator& operator=(iterator&&) = default;
338  iterator& operator=(const iterator&) = default;
339 
340  iterator& operator++() {
341  assert(info.cur.get() != nullptr);
342  auto next = info.cur->next.load(std::memory_order_relaxed);
343  guard_ptr tmp_guard;
344  // (1) - this acquire-load synchronizes-with the release-CAS (8, 9, 10, 12, 15)
345  if (next.mark() == 0 && tmp_guard.acquire_if_equal(info.cur->next, next, std::memory_order_acquire)) {
346  info.prev = &info.cur->next;
347  info.save = std::move(info.cur);
348  info.cur = std::move(tmp_guard);
349  } else {
350  // cur is marked for removal
351  // -> use find to remove it and get to the next node with a key >= cur->key
352  // Note: we have to copy key here!
353  Key key = info.cur->data.value.first;
354  hash_t h = info.cur->data.get_hash();
355  backoff backoff;
356  map->find(h, key, bucket, info, backoff);
357  }
358  assert(info.prev == &map->buckets[bucket] || info.cur.get() == nullptr ||
359  (info.save.get() != nullptr && &info.save->next == info.prev));
360 
361  if (!info.cur) {
362  move_to_next_bucket();
363  }
364 
365  return *this;
366  }
367  iterator operator++(int) {
368  iterator retval = *this;
369  ++(*this);
370  return retval;
371  }
372  bool operator==(const iterator& other) const { return info.cur.get() == other.info.cur.get(); }
373  bool operator!=(const iterator& other) const { return !(*this == other); }
374  reference operator*() const noexcept { return info.cur->data.value; }
375  pointer operator->() const noexcept { return &info.cur->data.value; }
376 
377  void reset() {
378  bucket = num_buckets;
379  info.prev = nullptr;
380  info.cur.reset();
381  info.save.reset();
382  }
383 
384 private:
386 
387  explicit iterator(harris_michael_hash_map* map) : map(map), bucket(num_buckets) {}
388 
389  explicit iterator(harris_michael_hash_map* map, std::size_t bucket) : map(map), bucket(bucket) {
390  info.prev = &map->buckets[bucket];
391  // (2) - this acquire-load synchronizes-with the release-CAS (8, 9, 10, 12, 15)
392  info.cur.acquire(*info.prev, std::memory_order_acquire);
393 
394  if (!info.cur) {
395  move_to_next_bucket();
396  }
397  }
398 
399  explicit iterator(harris_michael_hash_map* map, std::size_t bucket, find_info&& info) :
400  map(map),
401  bucket(bucket),
402  info(std::move(info)) {}
403 
404  void move_to_next_bucket() {
405  info.save.reset();
406  while (!info.cur && bucket < num_buckets - 1) {
407  ++bucket;
408  info.prev = &map->buckets[bucket];
409  // (3) - this acquire-load synchronizes-with the release-CAS (8, 9, 10, 12, 15)
410  info.cur.acquire(*info.prev, std::memory_order_acquire);
411  }
412  }
413 
415  std::size_t bucket;
416  find_info info;
417 };
418 
428 template <class Key, class Value, class... Policies>
429 class harris_michael_hash_map<Key, Value, Policies...>::accessor {
430 public:
431  Value* operator->() const noexcept { return &guard->data.value.second; }
432  Value& operator*() const noexcept { return guard->data.value.second; }
433  void reset() { guard.reset(); }
434 
435 private:
436  explicit accessor(guard_ptr&& guard) : guard(std::move(guard)) {}
437  guard_ptr guard;
439 };
440 
441 template <class Key, class Value, class... Policies>
443  for (std::size_t i = 0; i < num_buckets; ++i) {
444  // (4) - this acquire-load synchronizes-with the release-CAS (8, 9, 10, 12, 15)
445  auto p = buckets[i].load(std::memory_order_acquire);
446  while (p) {
447  // (5) - this acquire-load synchronizes-with the release-CAS (8, 9, 10, 12, 15)
448  auto next = p->next.load(std::memory_order_acquire);
449  delete p.get();
450  p = next;
451  }
452  }
453 }
454 
455 template <class Key, class Value, class... Policies>
457  const Key& key,
458  std::size_t bucket,
459  find_info& info,
460  backoff& backoff) {
461  auto& head = buckets[bucket];
462  assert((info.save == nullptr && info.prev == &head) || &info.save->next == info.prev);
463  concurrent_ptr* start = info.prev;
464  guard_ptr start_guard = info.save; // we have to keep a guard_ptr to prevent start's node from getting reclaimed.
465 retry:
466  info.prev = start;
467  info.save = start_guard;
468  info.next = info.prev->load(std::memory_order_relaxed);
469  if (info.next.mark() != 0) {
470  // our start node is marked for removal -> we have to restart from head
471  start = &head;
472  start_guard.reset();
473  goto retry;
474  }
475 
476  for (;;) {
477  // (6) - this acquire-load synchronizes-with the release-CAS (8, 9, 10, 12, 15)
478  if (!info.cur.acquire_if_equal(*info.prev, info.next, std::memory_order_acquire)) {
479  goto retry;
480  }
481 
482  if (!info.cur) {
483  return false;
484  }
485 
486  info.next = info.cur->next.load(std::memory_order_relaxed);
487  if (info.next.mark() != 0) {
488  // Node *cur is marked for deletion -> update the link and retire the element
489 
490  // (7) - this acquire-load synchronizes-with the release-CAS (8, 9, 10, 12, 15)
491  info.next = info.cur->next.load(std::memory_order_acquire).get();
492 
493  // Try to splice out node
494  marked_ptr expected = info.cur.get();
495  // (8) - this release-CAS synchronizes with the acquire-load (1, 2, 3, 4, 5, 6, 7, 13)
496  // and the acquire-CAS (11, 14)
497  // it is the head of a potential release sequence containing (11, 14)
498  if (!info.prev->compare_exchange_weak(
499  expected, info.next, std::memory_order_release, std::memory_order_relaxed)) {
500  backoff();
501  goto retry;
502  }
503  info.cur.reclaim();
504  } else {
505  if (info.prev->load(std::memory_order_relaxed) != info.cur.get()) {
506  goto retry; // cur might be cut from the hash_map.
507  }
508 
509  const auto& data = info.cur->data;
510  if (data.greater_or_equal(hash, key)) {
511  return data.value.first == key;
512  }
513 
514  info.prev = &info.cur->next;
515  std::swap(info.save, info.cur);
516  }
517  }
518 }
519 
520 template <class Key, class Value, class... Policies>
522  auto h = hash{}(key);
523  auto bucket = map_to_bucket{}(h, num_buckets);
524  find_info info{&buckets[bucket]};
525  backoff backoff;
526  return find(h, key, bucket, info, backoff);
527 }
528 
529 template <class Key, class Value, class... Policies>
531  auto h = hash{}(key);
532  auto bucket = map_to_bucket{}(h, num_buckets);
533  find_info info{&buckets[bucket]};
534  backoff backoff;
535  if (find(h, key, bucket, info, backoff)) {
536  return iterator(this, bucket, std::move(info));
537  }
538  return end();
539 }
540 
541 template <class Key, class Value, class... Policies>
542 template <class... Args>
544  auto result = emplace_or_get(std::forward<Args>(args)...);
545  return result.second;
546 }
547 
548 template <class Key, class Value, class... Policies>
549 template <class... Args>
551  -> std::pair<iterator, bool> {
552  return do_get_or_emplace_lazy(std::move(key), [&args...](hash_t hash, Key key) {
553  return new node(hash,
554  std::piecewise_construct,
555  std::forward_as_tuple(std::move(key)),
556  std::forward_as_tuple(std::forward<Args>(args)...));
557  });
558 }
559 
560 template <class Key, class Value, class... Policies>
561 template <typename Factory>
563  -> std::pair<iterator, bool> {
564  return do_get_or_emplace_lazy(
565  std::move(key), [&value_factory](hash_t hash, Key key) { return new node(hash, std::move(key), value_factory()); });
566 }
567 
568 template <class Key, class Value, class... Policies>
569 template <typename Factory>
570 auto harris_michael_hash_map<Key, Value, Policies...>::do_get_or_emplace_lazy(Key key, Factory node_factory)
571  -> std::pair<iterator, bool> {
572  node* n = nullptr;
573  auto h = hash{}(key);
574  auto bucket = map_to_bucket{}(h, num_buckets);
575 
576  const Key* pkey = &key;
577  find_info info{&buckets[bucket]};
578  backoff backoff;
579  for (;;) {
580  if (find(h, *pkey, bucket, info, backoff)) {
581  delete n;
582  return {iterator(this, bucket, std::move(info)), false};
583  }
584  if (n == nullptr) {
585  n = node_factory(h, std::move(key)); // NOLINT (use-after-move)
586  pkey = &n->data.value.first;
587  }
588 
589  // Try to install new node
590  marked_ptr cur = info.cur.get();
591  info.cur.reset();
592  info.cur = guard_ptr(n);
593  n->next.store(cur, std::memory_order_relaxed);
594 
595  // (9) - this release-CAS synchronizes with the acquire-load (1, 2, 3, 4, 5, 6, 7, 13)
596  // and the acquire-CAS (11, 14)
597  // it is the head of a potential release sequence containing (11, 14)
598  if (info.prev->compare_exchange_weak(cur, n, std::memory_order_release, std::memory_order_relaxed)) {
599  return {iterator(this, bucket, std::move(info)), true};
600  }
601 
602  backoff();
603  }
604 }
605 
606 template <class Key, class Value, class... Policies>
607 template <class... Args>
608 auto harris_michael_hash_map<Key, Value, Policies...>::emplace_or_get(Args&&... args) -> std::pair<iterator, bool> {
609  node* n = new node(construct_without_hash{}, std::forward<Args>(args)...);
610 
611  auto h = n->data.get_hash();
612  auto bucket = map_to_bucket{}(h, num_buckets);
613 
614  find_info info{&buckets[bucket]};
615  backoff backoff;
616  for (;;) {
617  if (find(h, n->data.value.first, bucket, info, backoff)) {
618  delete n;
619  return {iterator(this, bucket, std::move(info)), false};
620  }
621  // Try to install new node
622  marked_ptr expected = info.cur.get();
623  n->next.store(expected, std::memory_order_relaxed);
624  guard_ptr new_guard(n);
625 
626  // (10) - this release-CAS synchronizes with the acquire-load (1, 2, 3, 4, 5, 6, 7, 13)
627  // and the acquire-CAS (11, 14)
628  // it is the head of a potential release sequence containing (11, 14)
629  if (info.prev->compare_exchange_weak(expected, n, std::memory_order_release, std::memory_order_relaxed)) {
630  info.cur = std::move(new_guard);
631  return {iterator(this, bucket, std::move(info)), true};
632  }
633 
634  backoff();
635  }
636 }
637 
638 template <class Key, class Value, class... Policies>
640  auto h = hash{}(key);
641  auto bucket = map_to_bucket{}(h, num_buckets);
642  backoff backoff;
643  find_info info{&buckets[bucket]};
644  // Find node in hash_map with matching key and mark it for erasure.
645  do {
646  if (!find(h, key, bucket, info, backoff)) {
647  return false; // No such node in the hash_map
648  }
649  // (11) - this acquire-CAS synchronizes with the release-CAS (8, 9, 10, 12, 15)
650  // and is part of a release sequence headed by those operations
651  } while (!info.cur->next.compare_exchange_weak(
652  info.next, marked_ptr(info.next.get(), 1), std::memory_order_acquire, std::memory_order_relaxed));
653 
654  assert(info.next.mark() == 0);
655  assert(info.cur.mark() == 0);
656 
657  // Try to splice out node
658  marked_ptr expected = info.cur;
659  // (12) - this release-CAS synchronizes with the acquire-load (1, 2, 3, 4, 5, 6, 7, 13)
660  // and the acquire-CAS (11, 14)
661  // it is the head of a potential release sequence containing (11, 14)
662  if (info.prev->compare_exchange_weak(
663  expected, info.next.get(), std::memory_order_release, std::memory_order_relaxed)) {
664  info.cur.reclaim();
665  } else {
666  // Another thread interfered -> rewalk the bucket's list to ensure
667  // reclamation of marked node before returning.
668  find(h, key, bucket, info, backoff);
669  }
670 
671  return true;
672 }
673 
674 template <class Key, class Value, class... Policies>
676  backoff backoff;
677  // (13) - this acquire-load synchronizes-with the release-CAS (8, 9, 10, 12, 15)
678  auto next = pos.info.cur->next.load(std::memory_order_acquire);
679  while (next.mark() == 0) {
680  // (14) - this acquire-CAS synchronizes with the release-CAS (8, 9, 10, 12, 15)
681  // and is part of a release sequence headed by those operations
682  if (pos.info.cur->next.compare_exchange_weak(next, marked_ptr(next.get(), 1), std::memory_order_acquire)) {
683  break;
684  }
685 
686  backoff();
687  }
688 
689  guard_ptr next_guard(next.get());
690  assert(pos.info.cur.mark() == 0);
691 
692  // Try to splice out node
693  marked_ptr expected = pos.info.cur;
694  // (15) - this release-CAS synchronizes with the acquire-load (1, 2, 3, 4, 5, 6, 7, 13)
695  // and the acquire-CAS (11, 14)
696  // it is the head of a potential release sequence containing (11, 14)
697  if (pos.info.prev->compare_exchange_weak(
698  expected, next_guard, std::memory_order_release, std::memory_order_relaxed)) {
699  pos.info.cur.reclaim();
700  pos.info.cur = std::move(next_guard);
701  } else {
702  next_guard.reset();
703  Key key = pos.info.cur->data.value.first;
704  hash_t h = pos.info.cur->data.get_hash();
705 
706  // Another thread interfered -> rewalk the list to ensure reclamation of marked node before returning.
707  find(h, key, pos.bucket, pos.info, backoff);
708  }
709 
710  if (!pos.info.cur) {
711  pos.move_to_next_bucket();
712  }
713 
714  return pos;
715 }
716 
717 template <class Key, class Value, class... Policies>
719  auto result = get_or_emplace_lazy(key, []() { return Value{}; });
720  return accessor(std::move(result.first.info.cur));
721 }
722 
723 template <class Key, class Value, class... Policies>
725  return iterator(this, 0);
726 }
727 
728 template <class Key, class Value, class... Policies>
730  return iterator(this);
731 }
732 } // namespace xenium
733 
734 #endif
xenium::harris_michael_hash_map::get_or_emplace_lazy
std::pair< iterator, bool > get_or_emplace_lazy(Key key, Factory factory)
Inserts a new element into the container if the container doesn't already contain an element with an ...
xenium::policy::backoff
Policy to configure the backoff strategy.
Definition: policy.hpp:39
xenium::harris_michael_hash_map::end
iterator end()
Returns an iterator to the element following the last element of the container.
Definition: harris_michael_hash_map.hpp:729
xenium::harris_michael_hash_map::emplace_or_get
std::pair< iterator, bool > emplace_or_get(Args &&... args)
Inserts a new element into the container if the container doesn't already contain an element with an ...
xenium::harris_michael_hash_map::operator[]
accessor operator[](const Key &key)
The accessor
Definition: harris_michael_hash_map.hpp:718
xenium::harris_michael_hash_map::find
iterator find(const Key &key)
Finds an element with key equivalent to key.
Definition: harris_michael_hash_map.hpp:530
xenium::harris_michael_hash_map::begin
iterator begin()
Returns an iterator to the first element of the container.
Definition: harris_michael_hash_map.hpp:724
xenium::harris_michael_hash_map::accessor
An accessor to safely access the value of an element.
Definition: harris_michael_hash_map.hpp:429
xenium::hash
Slim wrapper around std::hash with specialization for pointer types.
Definition: hash.hpp:25
xenium::policy::buckets
Policy to configure the number of buckets in harris_michael_hash_map.
Definition: harris_michael_hash_map.hpp:27
xenium::harris_michael_hash_map::emplace
bool emplace(Args &&... args)
Inserts a new element into the container if the container doesn't already contain an element with an ...
Definition: harris_michael_hash_map.hpp:543
xenium::policy::reclaimer
Policy to configure the reclamation scheme to be used.
Definition: policy.hpp:25
xenium::policy::memoize_hash
Policy to configure whether the hash value should be stored and used during lookup operations in harr...
Definition: harris_michael_hash_map.hpp:50
xenium::no_backoff
Dummy backoff strategy that does nothing.
Definition: backoff.hpp:16
xenium::harris_michael_hash_map::erase
bool erase(const Key &key)
Removes the element with the key equivalent to key (if one exists).
Definition: harris_michael_hash_map.hpp:639
xenium::harris_michael_hash_map::get_or_emplace
std::pair< iterator, bool > get_or_emplace(Key key, Args &&... args)
Inserts a new element into the container if the container doesn't already contain an element with an ...
xenium::harris_michael_hash_map::iterator
A ForwardIterator to safely iterate the hash-map.
Definition: harris_michael_hash_map.hpp:326
xenium::policy::map_to_bucket
Policy to configure the function that maps the hash value to a bucket in harris_michael_hash_map.
Definition: harris_michael_hash_map.hpp:38
xenium::harris_michael_hash_map
A generic lock-free hash-map.
Definition: harris_michael_hash_map.hpp:87
xenium::harris_michael_hash_map::contains
bool contains(const Key &key)
Checks if there is an element with key equivalent to key in the container.
Definition: harris_michael_hash_map.hpp:521