public Hashtable(int capacity, float loadFactor) { if (capacity < 0) throw new ArgumentOutOfRangeException("capacity", Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); if (!(loadFactor >= 0.1f && loadFactor <= 1.0f)) throw new ArgumentOutOfRangeException("loadFactor", Environment.GetResourceString("ArgumentOutOfRange_HashtableLoadFactor", .1, 1.0)); Contract.EndContractBlock();
// Based on perf work, .72 is the optimal load factor for this table. this.loadFactor = 0.72f * loadFactor;
double rawsize = capacity / this.loadFactor; if (rawsize > Int32.MaxValue) throw new ArgumentException(Environment.GetResourceString("Arg_HTCapacityOverflow"));
// Avoid awfully small sizes int hashsize = (rawsize > InitialSize) ? HashHelpers.GetPrime((int)rawsize) : InitialSize; buckets = new bucket[hashsize];
loadsize = (int)(this.loadFactor * hashsize); isWriterInProgress = false; // Based on the current algorithm, loadsize must be less than hashsize. Contract.Assert( loadsize < hashsize, "Invalid hashtable loadsize!"); }
uint seed; uint incr; // Take a snapshot of buckets, in case another thread resizes table bucket[] lbuckets = buckets; uint hashcode = InitHash(key, lbuckets.Length, out seed, out incr); int ntry = 0;
bucket b; int bucketNumber = (int) (seed % (uint)lbuckets.Length); do { b = lbuckets[bucketNumber]; if (b.key == null) { return false; } if (((b.hash_coll & 0x7FFFFFFF) == hashcode) && KeyEquals (b.key, key)) return true; bucketNumber = (int) (((long)bucketNumber + incr)% (uint)lbuckets.Length); } while (b.hash_coll < 0 && ++ntry < lbuckets.Length); return false;
// ‘InitHash’ is basically an implementation of classic DoubleHashing (see http://en.wikipedia.org/wiki/Double_hashing) // // 1) The only ‘correctness’ requirement is that the ‘increment’ used to probe // a. Be non-zero // b. Be relatively prime to the table size ‘hashSize’. (This is needed to insure you probe all entries in the table before you ‘wrap’ and visit entries already probed) // 2) Because we choose table sizes to be primes, we just need to insure that the increment is 0 < incr < hashSize // // Thus this function would work: Incr = 1 + (seed % (hashSize-1)) // // While this works well for ‘uniformly distributed’ keys, in practice, non-uniformity is common. // In particular in practice we can see ‘mostly sequential’ where you get long clusters of keys that ‘pack’. // To avoid bad behavior you want it to be the case that the increment is ‘large’ even for ‘small’ values (because small // values tend to happen more in practice). Thus we multiply ‘seed’ by a number that will make these small values // bigger (and not hurt large values). We picked HashPrime (101) because it was prime, and if ‘hashSize-1’ is not a multiple of HashPrime // (enforced in GetPrime), then incr has the potential of being every value from 1 to hashSize-1. The choice was largely arbitrary. // // Computes the hash function: H(key, i) = h1(key) + i*h2(key, hashSize). // The out parameter seed is h1(key), while the out parameter // incr is h2(key, hashSize). Callers of this function should // add incr each time through a loop. private uint InitHash(Object key, int hashsize, out uint seed, out uint incr) { // Hashcode must be positive. Also, we must not use the sign bit, since // that is used for the collision bit. uint hashcode = (uint) GetHash(key) & 0x7FFFFFFF; seed = (uint) hashcode; // Restriction: incr MUST be between 1 and hashsize - 1, inclusive for // the modular arithmetic to work correctly. This guarantees you'll // visit every bucket in the table exactly once within hashsize // iterations. Violate this and it'll cause obscure bugs forever. // If you change this calculation for h2(key), update putEntry too! incr = (uint)(1 + ((seed * HashPrime) % ((uint)hashsize - 1))); return hashcode; }
if (key == null) { throw new ArgumentNullException("key", Environment.GetResourceString("ArgumentNull_Key")); } Contract.EndContractBlock(); if (count >= loadsize) { expand(); } else if(occupancy > loadsize && count > 100) { rehash(); }
uint seed; uint incr; // Assume we only have one thread writing concurrently. Modify // buckets to contain new data, as long as we insert in the right order. uint hashcode = InitHash(key, buckets.Length, out seed, out incr); int ntry = 0; int emptySlotNumber = -1; // We use the empty slot number to cache the first empty slot. We chose to reuse slots // create by remove that have the collision bit set over using up new slots. int bucketNumber = (int) (seed % (uint)buckets.Length); do {
// Set emptySlot number to current bucket if it is the first available bucket that we have seen // that once contained an entry and also has had a collision. // We need to search this entire collision chain because we have to ensure that there are no // duplicate entries in the table. if (emptySlotNumber == -1 && (buckets[bucketNumber].key == buckets) && (buckets[bucketNumber].hash_coll < 0))//(((buckets[bucketNumber].hash_coll & unchecked(0x80000000))!=0))) emptySlotNumber = bucketNumber;
// Insert the key/value pair into this bucket if this bucket is empty and has never contained an entry // OR // This bucket once contained an entry but there has never been a collision if ((buckets[bucketNumber].key == null) || (buckets[bucketNumber].key == buckets && ((buckets[bucketNumber].hash_coll & unchecked(0x80000000))==0))) {
// If we have found an available bucket that has never had a collision, but we've seen an available // bucket in the past that has the collision bit set, use the previous bucket instead if (emptySlotNumber != -1) // Reuse slot bucketNumber = emptySlotNumber;
// We pretty much have to insert in this order. Don't set hash // code until the value & key are set appropriately. #if !FEATURE_CORECLR Thread.BeginCriticalRegion(); #endif isWriterInProgress = true; buckets[bucketNumber].val = nvalue; buckets[bucketNumber].key = key; buckets[bucketNumber].hash_coll |= (int) hashcode; count++; UpdateVersion(); isWriterInProgress = false; #if !FEATURE_CORECLR Thread.EndCriticalRegion(); #endif
#if FEATURE_RANDOMIZED_STRING_HASHING #if !FEATURE_CORECLR // coreclr has the randomized string hashing on by default so we don't need to resize at this point
if(ntry > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(_keycomparer)) { // PERF: We don't want to rehash if _keycomparer is already a RandomizedObjectEqualityComparer since in some // cases there may not be any strings in the hashtable and we wouldn't get any mixing. if(_keycomparer == null || !(_keycomparer is System.Collections.Generic.RandomizedObjectEqualityComparer)) { _keycomparer = HashHelpers.GetRandomizedEqualityComparer(_keycomparer); rehash(buckets.Length, true); } } #endif // !FEATURE_CORECLR #endif // FEATURE_RANDOMIZED_STRING_HASHING
return; }
// The current bucket is in use // OR // it is available, but has had the collision bit set and we have already found an available bucket if (((buckets[bucketNumber].hash_coll & 0x7FFFFFFF) == hashcode) && KeyEquals (buckets[bucketNumber].key, key)) { if (add) { throw new ArgumentException(Environment.GetResourceString("Argument_AddingDuplicate__", buckets[bucketNumber].key, key)); } #if !FEATURE_CORECLR Thread.BeginCriticalRegion(); #endif isWriterInProgress = true; buckets[bucketNumber].val = nvalue; UpdateVersion(); isWriterInProgress = false; #if !FEATURE_CORECLR Thread.EndCriticalRegion(); #endif
#if FEATURE_RANDOMIZED_STRING_HASHING #if !FEATURE_CORECLR if(ntry > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(_keycomparer)) { // PERF: We don't want to rehash if _keycomparer is already a RandomizedObjectEqualityComparer since in some // cases there may not be any strings in the hashtable and we wouldn't get any mixing. if(_keycomparer == null || !(_keycomparer is System.Collections.Generic.RandomizedObjectEqualityComparer)) { _keycomparer = HashHelpers.GetRandomizedEqualityComparer(_keycomparer); rehash(buckets.Length, true); } } #endif // !FEATURE_CORECLR #endif return; }
// The current bucket is full, and we have therefore collided. We need to set the collision bit // UNLESS // we have remembered an available slot previously. if (emptySlotNumber == -1) {// We don't need to set the collision bit here since we already have an empty slot if( buckets[bucketNumber].hash_coll >= 0 ) { buckets[bucketNumber].hash_coll |= unchecked((int)0x80000000); occupancy++; } }
// This code is here if and only if there were no buckets without a collision bit set in the entire table if (emptySlotNumber != -1) { // We pretty much have to insert in this order. Don't set hash // code until the value & key are set appropriately. #if !FEATURE_CORECLR Thread.BeginCriticalRegion(); #endif isWriterInProgress = true; buckets[emptySlotNumber].val = nvalue; buckets[emptySlotNumber].key = key; buckets[emptySlotNumber].hash_coll |= (int) hashcode; count++; UpdateVersion(); isWriterInProgress = false; #if !FEATURE_CORECLR Thread.EndCriticalRegion(); #endif
#if FEATURE_RANDOMIZED_STRING_HASHING #if !FEATURE_CORECLR if(buckets.Length > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(_keycomparer)) { // PERF: We don't want to rehash if _keycomparer is already a RandomizedObjectEqualityComparer since in some // cases there may not be any strings in the hashtable and we wouldn't get any mixing. if(_keycomparer == null || !(_keycomparer is System.Collections.Generic.RandomizedObjectEqualityComparer)) { _keycomparer = HashHelpers.GetRandomizedEqualityComparer(_keycomparer); rehash(buckets.Length, true); } } #endif // !FEATURE_CORECLR #endif return; }
// If you see this assert, make sure load factor & count are reasonable. // Then verify that our double hash function (h2, described at top of file) // meets the requirements described above. You should never see this assert. Contract.Assert(false, "hash table insert failed! Load factor too high, or our double hashing function is incorrect."); throw new InvalidOperationException(Environment.GetResourceString("InvalidOperation_HashInsertFailed")); }
private void rehash( int newsize, bool forceNewHashCode ) {
// reset occupancy occupancy=0;
// Don't replace any internal state until we've finished adding to the // new bucket[]. This serves two purposes: // 1) Allow concurrent readers to see valid hashtable contents // at all times // 2) Protect against an OutOfMemoryException while allocating this // new bucket[]. bucket[] newBuckets = new bucket[newsize];
// rehash table into new buckets int nb; for (nb = 0; nb < buckets.Length; nb++){ bucket oldb = buckets[nb]; if ((oldb.key != null) && (oldb.key != buckets)) { int hashcode = ((forceNewHashCode ? GetHash(oldb.key) : oldb.hash_coll) & 0x7FFFFFFF); putEntry(newBuckets, oldb.key, oldb.val, hashcode); } } // New bucket[] is good to go - replace buckets and other internal state. #if !FEATURE_CORECLR Thread.BeginCriticalRegion(); #endif isWriterInProgress = true; buckets = newBuckets; loadsize = (int)(loadFactor * newsize); UpdateVersion(); isWriterInProgress = false; #if !FEATURE_CORECLR Thread.EndCriticalRegion(); #endif // minimun size of hashtable is 3 now and maximum loadFactor is 0.72 now. Contract.Assert(loadsize < newsize, "Our current implementaion means this is not possible."); return; }
public virtual void Remove(Object key) { if (key == null) { throw new ArgumentNullException("key", Environment.GetResourceString("ArgumentNull_Key")); } Contract.EndContractBlock(); Contract.Assert(!isWriterInProgress, "Race condition detected in usages of Hashtable - multiple threads appear to be writing to a Hashtable instance simultaneously! Don't do that - use Hashtable.Synchronized.");
uint seed; uint incr; // Assuming only one concurrent writer, write directly into buckets. uint hashcode = InitHash(key, buckets.Length, out seed, out incr); int ntry = 0; bucket b; int bn = (int) (seed % (uint)buckets.Length); // bucketNumber do { b = buckets[bn]; if (((b.hash_coll & 0x7FFFFFFF) == hashcode) && KeyEquals (b.key, key)) { #if !FEATURE_CORECLR Thread.BeginCriticalRegion(); #endif isWriterInProgress = true; // Clear hash_coll field, then key, then value buckets[bn].hash_coll &= unchecked((int)0x80000000); if (buckets[bn].hash_coll != 0) { buckets[bn].key = buckets; } else { buckets[bn].key = null; } buckets[bn].val = null; // Free object references sooner & simplify ContainsValue. count--; UpdateVersion(); isWriterInProgress = false; #if !FEATURE_CORECLR Thread.EndCriticalRegion(); #endif return; } bn = (int) (((long)bn + incr)% (uint)buckets.Length); } while (b.hash_coll < 0 && ++ntry < buckets.Length);
//throw new ArgumentException(Environment.GetResourceString("Arg_RemoveArgNotFound")); }
get { if (key == null) { throw new ArgumentNullException("key", Environment.GetResourceString("ArgumentNull_Key")); } Contract.EndContractBlock();
uint seed; uint incr;
// Take a snapshot of buckets, in case another thread does a resize bucket[] lbuckets = buckets; uint hashcode = InitHash(key, lbuckets.Length, out seed, out incr); int ntry = 0;
bucket b; int bucketNumber = (int) (seed % (uint)lbuckets.Length); do { int currentversion;
// A read operation on hashtable has three steps: // (1) calculate the hash and find the slot number. // (2) compare the hashcode, if equal, go to step 3. Otherwise end. // (3) compare the key, if equal, go to step 4. Otherwise end. // (4) return the value contained in the bucket. // After step 3 and before step 4. A writer can kick in a remove the old item and add a new one // in the same bukcet. So in the reader we need to check if the hash table is modified during above steps. // // Writers (Insert, Remove, Clear) will set 'isWriterInProgress' flag before it starts modifying // the hashtable and will ckear the flag when it is done. When the flag is cleared, the 'version' // will be increased. We will repeat the reading if a writer is in progress or done with the modification // during the read. // // Our memory model guarantee if we pick up the change in bucket from another processor, // we will see the 'isWriterProgress' flag to be true or 'version' is changed in the reader. // int spinCount = 0; do { // this is violate read, following memory accesses can not be moved ahead of it. currentversion = version; b = lbuckets[bucketNumber];
// The contention between reader and writer shouldn't happen frequently. // But just in case this will burn CPU, yield the control of CPU if we spinned a few times. // 8 is just a random number I pick. if( (++spinCount) % 8 == 0 ) { Thread.Sleep(1); // 1 means we are yeilding control to all threads, including low-priority ones. } } while ( isWriterInProgress || (currentversion != version) );
// Returns a thread-safe wrapper for a Hashtable. // [HostProtection(Synchronization=true)] public static Hashtable Synchronized(Hashtable table) { if (table==null) throw new ArgumentNullException("table"); Contract.EndContractBlock(); return new SyncHashtable(table); }