/* * Copyright (c) 2015 Andrew Kelley * * This file is part of zig, which is MIT licensed. * See http://opensource.org/licenses/MIT */ #ifndef ZIG_HASH_MAP_HPP #define ZIG_HASH_MAP_HPP #include "util.hpp" #include template class HashMap { public: void init(int capacity) { init_capacity(capacity); } void deinit(void) { _entries.deinit(); heap::c_allocator.deallocate(_index_bytes, _indexes_len * capacity_index_size(_indexes_len)); } struct Entry { uint32_t hash; uint32_t distance_from_start_index; K key; V value; }; void clear() { _entries.clear(); memset(_index_bytes, 0, _indexes_len * capacity_index_size(_indexes_len)); _max_distance_from_start_index = 0; _modification_count += 1; } size_t size() const { return _entries.length; } void put(const K &key, const V &value) { _modification_count += 1; // This allows us to take a pointer to an entry in `internal_put` which // will not become a dead pointer when the array list is appended. _entries.ensure_capacity(_entries.length + 1); if (_index_bytes == nullptr) { if (_entries.length < 16) { _entries.append({HashFunction(key), 0, key, value}); return; } else { _indexes_len = 32; _index_bytes = heap::c_allocator.allocate(_indexes_len); _max_distance_from_start_index = 0; for (size_t i = 0; i < _entries.length; i += 1) { Entry *entry = &_entries.items[i]; put_index(entry, i, _index_bytes); } return internal_put(key, value, _index_bytes); } } // if we would get too full (60%), double the indexes size if ((_entries.length + 1) * 5 >= _indexes_len * 3) { heap::c_allocator.deallocate(_index_bytes, _indexes_len * capacity_index_size(_indexes_len)); _indexes_len *= 2; size_t sz = capacity_index_size(_indexes_len); // This zero initializes the bytes, setting them all empty. _index_bytes = heap::c_allocator.allocate(_indexes_len * sz); _max_distance_from_start_index = 0; for (size_t i = 0; i < _entries.length; i += 1) { Entry *entry = &_entries.items[i]; switch (sz) { case 1: put_index(entry, i, (uint8_t*)_index_bytes); continue; case 2: put_index(entry, i, (uint16_t*)_index_bytes); continue; case 4: put_index(entry, i, (uint32_t*)_index_bytes); continue; default: put_index(entry, i, (size_t*)_index_bytes); continue; } } } switch (capacity_index_size(_indexes_len)) { case 1: return internal_put(key, value, (uint8_t*)_index_bytes); case 2: return internal_put(key, value, (uint16_t*)_index_bytes); case 4: return internal_put(key, value, (uint32_t*)_index_bytes); default: return internal_put(key, value, (size_t*)_index_bytes); } } Entry *put_unique(const K &key, const V &value) { // TODO make this more efficient Entry *entry = internal_get(key); if (entry) return entry; put(key, value); return nullptr; } const V &get(const K &key) const { Entry *entry = internal_get(key); if (!entry) zig_panic("key not found"); return entry->value; } Entry *maybe_get(const K &key) const { return internal_get(key); } bool remove(const K &key) { bool deleted_something = maybe_remove(key); if (!deleted_something) zig_panic("key not found"); return deleted_something; } bool maybe_remove(const K &key) { _modification_count += 1; if (_index_bytes == nullptr) { uint32_t hash = HashFunction(key); for (size_t i = 0; i < _entries.length; i += 1) { if (_entries.items[i].hash == hash && EqualFn(_entries.items[i].key, key)) { _entries.swap_remove(i); return true; } } return false; } switch (capacity_index_size(_indexes_len)) { case 1: return internal_remove(key, (uint8_t*)_index_bytes); case 2: return internal_remove(key, (uint16_t*)_index_bytes); case 4: return internal_remove(key, (uint32_t*)_index_bytes); default: return internal_remove(key, (size_t*)_index_bytes); } } class Iterator { public: Entry *next() { if (_inital_modification_count != _table->_modification_count) zig_panic("concurrent modification"); if (_index >= _table->_entries.length) return nullptr; Entry *entry = &_table->_entries.items[_index]; _index += 1; return entry; } private: const HashMap * _table; // iterator through the entry array size_t _index = 0; // used to detect concurrent modification uint32_t _inital_modification_count; Iterator(const HashMap * table) : _table(table), _inital_modification_count(table->_modification_count) { } friend HashMap; }; // you must not modify the underlying HashMap while this iterator is still in use Iterator entry_iterator() const { return Iterator(this); } private: // Maintains insertion order. ZigList _entries; // If _indexes_len is less than 2**8, this is an array of uint8_t. // If _indexes_len is less than 2**16, it is an array of uint16_t. // If _indexes_len is less than 2**32, it is an array of uint32_t. // Otherwise it is size_t. // It's off by 1. 0 means empty slot, 1 means index 0, etc. uint8_t *_index_bytes; // This is the number of indexes. When indexes are bytes, it equals number of bytes. // When indexes are uint16_t, _indexes_len is half the number of bytes. size_t _indexes_len; size_t _max_distance_from_start_index; // This is used to detect bugs where a hashtable is edited while an iterator is running. uint32_t _modification_count; void init_capacity(size_t capacity) { _entries = {}; _entries.ensure_capacity(capacity); _indexes_len = 0; if (capacity >= 16) { // So that at capacity it will only be 60% full. _indexes_len = capacity * 5 / 3; size_t sz = capacity_index_size(_indexes_len); // This zero initializes _index_bytes which sets them all to empty. _index_bytes = heap::c_allocator.allocate(_indexes_len * sz); } else { _index_bytes = nullptr; } _max_distance_from_start_index = 0; _modification_count = 0; } static size_t capacity_index_size(size_t len) { if (len < UINT8_MAX) return 1; if (len < UINT16_MAX) return 2; if (len < UINT32_MAX) return 4; return sizeof(size_t); } template void internal_put(const K &key, const V &value, I *indexes) { uint32_t hash = HashFunction(key); uint32_t distance_from_start_index = 0; size_t start_index = hash_to_index(hash); for (size_t roll_over = 0; roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; I index_data = indexes[index_index]; if (index_data == 0) { _entries.append_assuming_capacity({ hash, distance_from_start_index, key, value }); indexes[index_index] = _entries.length; if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; return; } // This pointer survives the following append because we call // _entries.ensure_capacity before internal_put. Entry *entry = &_entries.items[index_data - 1]; if (entry->hash == hash && EqualFn(entry->key, key)) { *entry = {hash, distance_from_start_index, key, value}; if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; return; } if (entry->distance_from_start_index < distance_from_start_index) { // In this case, we did not find the item. We will put a new entry. // However, we will use this index for the new entry, and move // the previous index down the line, to keep the _max_distance_from_start_index // as small as possible. _entries.append_assuming_capacity({ hash, distance_from_start_index, key, value }); indexes[index_index] = _entries.length; if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; distance_from_start_index = entry->distance_from_start_index; // Find somewhere to put the index we replaced by shifting // following indexes backwards. roll_over += 1; distance_from_start_index += 1; for (; roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; I next_index_data = indexes[index_index]; if (next_index_data == 0) { if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; entry->distance_from_start_index = distance_from_start_index; indexes[index_index] = index_data; return; } Entry *next_entry = &_entries.items[next_index_data - 1]; if (next_entry->distance_from_start_index < distance_from_start_index) { if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; entry->distance_from_start_index = distance_from_start_index; indexes[index_index] = index_data; distance_from_start_index = next_entry->distance_from_start_index; entry = next_entry; index_data = next_index_data; } } zig_unreachable(); } } zig_unreachable(); } template void put_index(Entry *entry, size_t entry_index, I *indexes) { size_t start_index = hash_to_index(entry->hash); size_t index_data = entry_index + 1; for (size_t roll_over = 0, distance_from_start_index = 0; roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; size_t next_index_data = indexes[index_index]; if (next_index_data == 0) { if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; entry->distance_from_start_index = distance_from_start_index; indexes[index_index] = index_data; return; } Entry *next_entry = &_entries.items[next_index_data - 1]; if (next_entry->distance_from_start_index < distance_from_start_index) { if (distance_from_start_index > _max_distance_from_start_index) _max_distance_from_start_index = distance_from_start_index; entry->distance_from_start_index = distance_from_start_index; indexes[index_index] = index_data; distance_from_start_index = next_entry->distance_from_start_index; entry = next_entry; index_data = next_index_data; } } zig_unreachable(); } Entry *internal_get(const K &key) const { if (_index_bytes == nullptr) { uint32_t hash = HashFunction(key); for (size_t i = 0; i < _entries.length; i += 1) { if (_entries.items[i].hash == hash && EqualFn(_entries.items[i].key, key)) { return &_entries.items[i]; } } return nullptr; } switch (capacity_index_size(_indexes_len)) { case 1: return internal_get2(key, (uint8_t*)_index_bytes); case 2: return internal_get2(key, (uint16_t*)_index_bytes); case 4: return internal_get2(key, (uint32_t*)_index_bytes); default: return internal_get2(key, (size_t*)_index_bytes); } } template Entry *internal_get2(const K &key, I *indexes) const { uint32_t hash = HashFunction(key); size_t start_index = hash_to_index(hash); for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; size_t index_data = indexes[index_index]; if (index_data == 0) return nullptr; Entry *entry = &_entries.items[index_data - 1]; if (entry->hash == hash && EqualFn(entry->key, key)) return entry; } return nullptr; } size_t hash_to_index(uint32_t hash) const { return ((size_t)hash) % _indexes_len; } template bool internal_remove(const K &key, I *indexes) { uint32_t hash = HashFunction(key); size_t start_index = hash_to_index(hash); for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; size_t index_data = indexes[index_index]; if (index_data == 0) return false; size_t index = index_data - 1; Entry *entry = &_entries.items[index]; if (entry->hash != hash || !EqualFn(entry->key, key)) continue; size_t prev_index = index_index; _entries.swap_remove(index); if (_entries.length > 0 && _entries.length != index) { // Because of the swap remove, now we need to update the index that was // pointing to the last entry and is now pointing to this removed item slot. update_entry_index(_entries.length, index, indexes); } // Now we have to shift over the following indexes. roll_over += 1; for (; roll_over < _indexes_len; roll_over += 1) { size_t next_index = (start_index + roll_over) % _indexes_len; if (indexes[next_index] == 0) { indexes[prev_index] = 0; return true; } Entry *next_entry = &_entries.items[indexes[next_index] - 1]; if (next_entry->distance_from_start_index == 0) { indexes[prev_index] = 0; return true; } indexes[prev_index] = indexes[next_index]; prev_index = next_index; next_entry->distance_from_start_index -= 1; } zig_unreachable(); } return false; } template void update_entry_index(size_t old_entry_index, size_t new_entry_index, I *indexes) { size_t start_index = hash_to_index(_entries.items[new_entry_index].hash); for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { size_t index_index = (start_index + roll_over) % _indexes_len; if (indexes[index_index] == old_entry_index + 1) { indexes[index_index] = new_entry_index + 1; return; } } zig_unreachable(); } }; #endif