2010-05-27 23:18:08 +00:00

564 lines
18 KiB

// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#pragma once
HashMap based on Thatcher Ulrich <tu@tulrich.com> container, donated to the Public Domain.
#include "nvcore.h"
#include "Memory.h"
#include "Debug.h"
#include "Stream.h"
#include "Utils.h" // hash
#include "Array.h" // foreach/pseudoindex
#include <new> // for placement new
namespace nv
/** Thatcher Ulrich's hash table.
* Hash table, linear probing, internal chaining. One
* interesting/nice thing about this implementation is that the table
* itself is a flat chunk of memory containing no pointers, only
* relative indices. If the key and value types of the hash contain
* no pointers, then the hash can be serialized using raw IO. Could
* come in handy.
* Never shrinks, unless you explicitly clear() it. Expands on
* demand, though. For best results, if you know roughly how big your
* table will be, default it to that size when you create it.
template<typename T, typename U, typename H = Hash<T>, typename E = Equal<T> >
class NVCORE_CLASS HashMap
/// Default ctor.
HashMap() : entry_count(0), size_mask(-1), table(NULL) { }
/// Ctor with size hint.
explicit HashMap(int size_hint) : entry_count(0), size_mask(-1), table(NULL) { setCapacity(size_hint); }
/// Dtor.
~HashMap() { clear(); }
/// Set a new or existing value under the key, to the value.
void set(const T& key, const U& value)
int index = findIndex(key);
if (index >= 0)
entry(index).value = value;
// Entry under key doesn't exist.
add(key, value);
/// Add a new value to the hash table, under the specified key.
void add(const T& key, const U& value)
nvCheck(findIndex(key) == -1);
nvCheck(table != NULL);
const uint hash_value = compute_hash(key);
const int index = hash_value & size_mask;
Entry * natural_entry = &(entry(index));
if (natural_entry->isEmpty())
// Put the new entry in.
new (natural_entry) Entry(key, value, -1, hash_value);
else if (natural_entry->isTombstone()) {
// Put the new entry in, without disturbing the rest of the chain.
int next_in_chain = natural_entry->next_in_chain;
new (natural_entry) Entry(key, value, next_in_chain, hash_value);
// Find a blank spot.
int blank_index = index;
for (int search_count = 0; ; search_count++)
blank_index = (blank_index + 1) & size_mask;
if (entry(blank_index).isEmpty()) break; // found it
if (entry(blank_index).isTombstone()) {
blank_index = removeTombstone(blank_index);
nvCheck(search_count < this->size_mask);
Entry * blank_entry = &entry(blank_index);
if (int(natural_entry->hash_value & size_mask) == index)
// Collision. Link into this chain.
// Move existing list head.
new (blank_entry) Entry(*natural_entry); // placement new, copy ctor
// Put the new info in the natural entry.
natural_entry->key = key;
natural_entry->value = value;
natural_entry->next_in_chain = blank_index;
natural_entry->hash_value = hash_value;
// Existing entry does not naturally
// belong in this slot. Existing
// entry must be moved.
// Find natural location of collided element (i.e. root of chain)
int collided_index = natural_entry->hash_value & size_mask;
for (int search_count = 0; ; search_count++)
Entry * e = &entry(collided_index);
if (e->next_in_chain == index)
// Here's where we need to splice.
new (blank_entry) Entry(*natural_entry);
e->next_in_chain = blank_index;
collided_index = e->next_in_chain;
nvCheck(collided_index >= 0 && collided_index <= size_mask);
nvCheck(search_count <= size_mask);
// Put the new data in the natural entry.
natural_entry->key = key;
natural_entry->value = value;
natural_entry->hash_value = hash_value;
natural_entry->next_in_chain = -1;
/// Remove the first value under the specified key.
bool remove(const T& key)
if (table == NULL)
return false;
int index = findIndex(key);
if (index < 0)
return false;
Entry * pos = &entry(index);
int natural_index = (int) (pos->hash_value & size_mask);
if (index != natural_index) {
// We're not the head of our chain, so we can
// be spliced out of it.
// Iterate up the chain, and splice out when
// we get to m_index.
Entry* e = &entry(natural_index);
while (e->next_in_chain != index) {
assert(e->isEndOfChain() == false);
e = &entry(e->next_in_chain);
if (e->isTombstone() && pos->isEndOfChain()) {
// Tombstone has nothing else to point
// to, so mark it empty.
e->next_in_chain = -2;
} else {
e->next_in_chain = pos->next_in_chain;
else if (pos->isEndOfChain() == false) {
// We're the head of our chain, and there are
// additional elements.
// We need to put a tombstone here.
// We can't clear the element, because the
// rest of the elements in the chain must be
// linked to this position.
// We can't move any of the succeeding
// elements in the chain (i.e. to fill this
// entry), because we don't want to invalidate
// any other existing iterators.
} else {
// We're the head of the chain, but we're the
// only member of the chain.
return true;
/// Remove all entries from the hash table.
void clear()
if (table != NULL)
// Delete the entries.
for (int i = 0, n = size_mask; i <= n; i++)
Entry * e = &entry(i);
if (e->isEmpty() == false && e->isTombstone() == false)
table = NULL;
entry_count = 0;
size_mask = -1;
/// Returns true if the hash is empty.
bool isEmpty() const
return table == NULL || entry_count == 0;
/** Retrieve the value under the given key.
* If there's no value under the key, then return false and leave
* *value alone.
* If there is a value, return true, and set *value to the entry's
* value.
* If value == NULL, return true or false according to the
* presence of the key, but don't touch *value.
bool get(const T& key, U* value = NULL) const
int index = findIndex(key);
if (index >= 0)
if (value) {
*value = entry(index).value; // take care with side-effects!
return true;
return false;
/// Determine if the given key is contained in the hash.
bool contains(const T & key) const
return get(key);
/// Number of entries in the hash.
int size() const
return entry_count;
/// Number of entries in the hash.
int count() const
return size();
* Resize the hash table to fit one more entry. Often this
* doesn't involve any action.
void checkExpand()
if (table == NULL) {
// Initial creation of table. Make a minimum-sized table.
else if (entry_count * 3 > (size_mask + 1) * 2) {
// Table is more than 2/3rds full. Expand.
setRawCapacity(entry_count * 2);
/// Hint the bucket count to >= n.
void resize(int n)
// Not really sure what this means in relation to
// STLport's hash_map... they say they "increase the
// bucket count to at least n" -- but does that mean
// their real capacity after resize(n) is more like
// n*2 (since they do linked-list chaining within
// buckets?).
* Size the hash so that it can comfortably contain the given
* number of elements. If the hash already contains more
* elements than new_size, then this may be a no-op.
void setCapacity(int new_size)
int new_raw_size = (new_size * 3) / 2;
if (new_raw_size < size()) { return; }
/// Behaves much like std::pair.
struct Entry
int next_in_chain; // internal chaining for collisions
uint hash_value; // avoids recomputing. Worthwhile?
T key;
U value;
Entry() : next_in_chain(-2) {}
Entry(const Entry& e)
: next_in_chain(e.next_in_chain), hash_value(e.hash_value), key(e.key), value(e.value)
Entry(const T& k, const U& v, int next, int hash)
: next_in_chain(next), hash_value(hash), key(k), value(v)
bool isEmpty() const { return next_in_chain == -2; }
bool isEndOfChain() const { return next_in_chain == -1; }
bool isTombstone() const { return hash_value == TOMBSTONE_HASH; }
void clear()
key.~T(); // placement delete
value.~U(); // placement delete
next_in_chain = -2;
hash_value = ~TOMBSTONE_HASH;
void makeTombstone()
hash_value = TOMBSTONE_HASH;
// HashMap enumerator.
typedef int PseudoIndex;
PseudoIndex start() const { PseudoIndex i = 0; findNext(i); return i; }
bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= size_mask+1); return i == size_mask+1; };
void advance(PseudoIndex & i) const { nvDebugCheck(i <= size_mask+1); i++; findNext(i); }
Entry & operator[]( const PseudoIndex & i ) {
Entry & e = entry(i);
nvDebugCheck(e.isTombstone() == false);
return e;
const Entry & operator[]( const PseudoIndex & i ) const {
const Entry & e = entry(i);
nvDebugCheck(e.isTombstone() == false);
return e;
#elif NV_CC_MSVC
Entry & operator[]( const PseudoIndexWrapper & i ) {
Entry & e = entry(i(this));
nvDebugCheck(e.isTombstone() == false);
return e;
const Entry & operator[]( const PseudoIndexWrapper & i ) const {
const Entry & e = entry(i(this));
nvDebugCheck(e.isTombstone() == false);
return e;
static const uint TOMBSTONE_HASH = (uint) -1;
uint compute_hash(const T& key) const
H hash;
uint hash_value = hash(key);
if (hash_value == TOMBSTONE_HASH) {
hash_value ^= 0x8000;
return hash_value;
// Find the index of the matching entry. If no match, then return -1.
int findIndex(const T& key) const
if (table == NULL) return -1;
E equal;
uint hash_value = compute_hash(key);
int index = hash_value & size_mask;
const Entry * e = &entry(index);
if (e->isEmpty()) return -1;
if (e->isTombstone() == false && int(e->hash_value & size_mask) != index) {
// occupied by a collider
return -1;
for (;;)
nvCheck(e->isTombstone() || (e->hash_value & size_mask) == (hash_value & size_mask));
if (e->hash_value == hash_value && equal(e->key, key))
// Found it.
return index;
nvDebugCheck(e->isTombstone() || !equal(e->key, key)); // keys are equal, but hash differs!
// Keep looking through the chain.
index = e->next_in_chain;
if (index == -1) break; // end of chain
nvCheck(index >= 0 && index <= size_mask);
e = &entry(index);
nvCheck(e->isEmpty() == false || e->isTombstone());
return -1;
// Return the index of the newly cleared element.
int removeTombstone(int index) {
Entry* e = &entry(index);
// Move the next element of the chain into the
// tombstone slot, and return the vacated element.
int new_blank_index = e->next_in_chain;
Entry* new_blank = &entry(new_blank_index);
new (e) Entry(*new_blank);
return new_blank_index;
// Helpers.
Entry & entry(int index)
nvDebugCheck(table != NULL);
nvDebugCheck(index >= 0 && index <= size_mask);
return table[index];
const Entry & entry(int index) const
nvDebugCheck(table != NULL);
nvDebugCheck(index >= 0 && index <= size_mask);
return table[index];
* Resize the hash table to the given size (Rehash the
* contents of the current table). The arg is the number of
* hash table entries, not the number of elements we should
* actually contain (which will be less than this).
void setRawCapacity(int new_size)
if (new_size <= 0) {
// Special case.
// Force new_size to be a power of two.
new_size = nextPowerOfTwo(new_size);
HashMap<T, U, H, E> new_hash;
new_hash.table = (Entry *) mem::malloc(sizeof(Entry) * new_size);
nvDebugCheck(new_hash.table != NULL);
new_hash.entry_count = 0;
new_hash.size_mask = new_size - 1;
for (int i = 0; i < new_size; i++)
new_hash.entry(i).next_in_chain = -2; // mark empty
// Copy stuff to new_hash
if (table != NULL)
for (int i = 0, n = size_mask; i <= n; i++)
Entry * e = &entry(i);
if (e->isEmpty() == false && e->isTombstone() == false)
// Insert old entry into new hash.
new_hash.add(e->key, e->value);
e->clear(); // placement delete of old element
// Delete our old data buffer.
// Steal new_hash's data.
entry_count = new_hash.entry_count;
size_mask = new_hash.size_mask;
table = new_hash.table;
new_hash.entry_count = 0;
new_hash.size_mask = -1;
new_hash.table = NULL;
// Move the enumerator to the next valid element.
void findNext(PseudoIndex & i) const {
while (i <= size_mask) {
const Entry & e = entry(i);
if (e.isEmpty() == false && e.isTombstone() == false) {
int entry_count;
int size_mask;
Entry * table;
} // nv namespace