[Scummvm-cvs-logs] SF.net SVN: scummvm: [21431] scummvm/trunk/common

fingolfin at users.sourceforge.net fingolfin at users.sourceforge.net
Fri Mar 24 08:54:09 CET 2006


Revision: 21431
Author:   fingolfin
Date:     2006-03-24 08:53:32 -0800 (Fri, 24 Mar 2006)
ViewCVS:  http://svn.sourceforge.net/scummvm/?rev=21431&view=rev

Log Message:
-----------
- replaced the hash table size heuristic with a table of hard coded table sizes
  (taken from the GNU ISO C++ Library), which are all prime
- replaced the string hash function by one that works slightly better & faster
- changed various types to unsigned
- added code to help debug the number of hash collisions (off by default)

Modified Paths:
--------------
    scummvm/trunk/common/assocarray.cpp
    scummvm/trunk/common/assocarray.h
Modified: scummvm/trunk/common/assocarray.cpp
===================================================================
--- scummvm/trunk/common/assocarray.cpp	2006-03-24 15:39:07 UTC (rev 21430)
+++ scummvm/trunk/common/assocarray.cpp	2006-03-24 16:53:32 UTC (rev 21431)
@@ -60,57 +60,68 @@
 namespace Common {
 
 // int:
-int hashit(int x, int hashsize) {
+uint hashit(int x, uint hashsize) {
 	return x % hashsize;
 }
 
-int data_eq(int x, int y) {
+bool data_eq(int x, int y) {
 	return x == y;
 }
 
 #if 0
 // double:
-int hashit(double d, int hashsize) {
-	int hash, dex;
-	byte *p = (byte *)&d;
-
-	hash = 0;
-
-	for (dex = 0; dex < sizeof(double); dex++)
-		hash = ((hash << 8) + p[dex]) % hashsize;
-
-	return hash;
+uint hashit(double d, uint hashsize) {
+	TODO
 }
 #endif
 
-int data_eq(double d1, double d2) {
+bool data_eq(double d1, double d2) {
 	return (d1 == d2);
 }
 
 // const char *:
-int hashit(const char *str, int hashsize) {
+uint hashit(const char *str, uint hashsize) {
 	const byte *p = (const byte *)str;
-	int hash, dex;
+	uint hash;
+	char c;
 
+	// my31 algo
 	hash = 0;
+	while ((c = *p++))
+		hash = (hash * 31 + c);
 
-	for (dex = 0; p[dex] != 0; dex++)
-		hash = ((hash << 8) + p[dex]) % hashsize;
-
-	return hash;
+	return hash % hashsize;
 }
 
-int data_eq(const char *str1, const char *str2) {
+bool data_eq(const char *str1, const char *str2) {
 	return !strcmp(str1, str2);
 }
 
 // String:
-int hashit(const Common::String &str, int hashsize) {
+uint hashit(const Common::String &str, uint hashsize) {
 	return hashit(str.c_str(), hashsize);
 }
 
-int data_eq(const Common::String &str1, const String &str2) {
+bool data_eq(const Common::String &str1, const String &str2) {
 	return (str1 == str2);
 }
 
+// The following table is taken from the GNU ISO C++ Library's hashtable.h file.
+static const uint primes[] = {
+	53ul,         97ul,         193ul,       389ul,       769ul,
+	1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
+	49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
+	1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
+	50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul,
+	1610612741ul, 3221225473ul, 4294967291ul
+};
+
+uint nextTableSize(uint x) {
+	int i = 0;
+	while (x >= primes[i])
+		i++;
+	return primes[i];
+}
+
+
 }	// End of namespace Common

Modified: scummvm/trunk/common/assocarray.h
===================================================================
--- scummvm/trunk/common/assocarray.h	2006-03-24 15:39:07 UTC (rev 21430)
+++ scummvm/trunk/common/assocarray.h	2006-03-24 16:53:32 UTC (rev 21431)
@@ -64,8 +64,6 @@
 
 namespace Common {
 
-#define INIT_SIZE 11
-
 typedef Common::String String;
 
 // If aa is an AssocArray<Key,Val>, then space is allocated each
@@ -80,15 +78,26 @@
 // be considered equal. Also, we assume that "=" works
 // on Val's for assignment.
 
-int hashit(int x, int hashsize);
-int data_eq(int x, int y);
-int hashit(double x, int hashsize);
-int data_eq(double x, double y);
-int hashit(const char *str, int hashsize);
-int data_eq(const char *str1, const char *str2);
-int hashit(const String &str, int hashsize);
-int data_eq(const String &str1, const String &str2);
+uint hashit(int x, uint hashsize);
+bool data_eq(int x, int y);
+uint hashit(double x, uint hashsize);
+bool data_eq(double x, double y);
+uint hashit(const char *str, uint hashsize);
+bool data_eq(const char *str1, const char *str2);
+uint hashit(const String &str, uint hashsize);
+bool data_eq(const String &str1, const String &str2);
 
+
+// The table sizes ideally are primes. We use a helper function to find
+// suitable table sizes.
+uint nextTableSize(uint x);
+
+
+// Enable the following #define if you want to check how many collisions the
+// code produces (many collisions indicate either a bad hash function, or a
+// hash table that is too small).
+//#define DEBUG_HASH_COLLISIONS
+
 template <class Key, class Val>
 class AssocArray {
 private:
@@ -101,7 +110,11 @@
 	};
 	
 	aa_ref_t **_arr;	// hashtable of size arrsize.
-	int _arrsize, _nele;
+	uint _arrsize, _nele;
+	
+#ifdef DEBUG_HASH_COLLISIONS
+	mutable int _collisions;
+#endif
 
 	int lookup(const Key &key) const;
 	void expand_array(void);
@@ -137,30 +150,32 @@
 
 template <class Key, class Val>
 int AssocArray<Key, Val>::lookup(const Key &key) const {
-	int ctr;
+	uint ctr = hashit(key, _arrsize);
 
-	ctr = hashit(key, _arrsize);
-
 	while (_arr[ctr] != NULL && !data_eq(_arr[ctr]->key, key)) {
 		ctr++;
 
 		if (ctr == _arrsize)
 			ctr = 0;
 	}
+	
+#ifdef DEBUG_HASH_COLLISIONS
+	fprintf(stderr, "collisions = %d in AssocArray %p\n", _collisions, (const void *)this);
+#endif
 
 	return ctr;
 }
 
 template <class Key, class Val>
 bool AssocArray<Key, Val>::contains(const Key &key) const {
-	int ctr = lookup(key);
+	uint ctr = lookup(key);
 	return (_arr[ctr] != NULL);
 }
 
 template <class Key, class Val>
 Key *AssocArray<Key, Val>::new_all_keys(void) const {
 	Key *all_keys;
-	int ctr, dex;
+	uint ctr, dex;
 
 	if (_nele == 0)
 		return NULL;
@@ -186,7 +201,7 @@
 template <class Key, class Val>
 Val *AssocArray<Key, Val>::new_all_values(void) const {
 	Val *all_values;
-	int ctr, dex;
+	uint ctr, dex;
 
 	if (_nele == 0)
 		return NULL;
@@ -212,21 +227,24 @@
 
 template <class Key, class Val>
 AssocArray<Key, Val>::AssocArray() {
-	int ctr;
+	uint ctr;
 
-	_arr = new aa_ref_t *[INIT_SIZE];
+	_arrsize = nextTableSize(0);
+	_arr = new aa_ref_t *[_arrsize];
 	assert(_arr != NULL);
-
-	for (ctr = 0; ctr < INIT_SIZE; ctr++)
+	for (ctr = 0; ctr < _arrsize; ctr++)
 		_arr[ctr] = NULL;
 
-	_arrsize = INIT_SIZE;
 	_nele = 0;
+	
+#ifdef DEBUG_HASH_COLLISIONS
+	_collisions = 0;
+#endif
 }
 
 template <class Key, class Val>
 AssocArray<Key, Val>::~AssocArray() {
-	int ctr;
+	uint ctr;
 
 	for (ctr = 0; ctr < _arrsize; ctr++)
 		if (_arr[ctr] != NULL)
@@ -237,20 +255,20 @@
 
 template <class Key, class Val>
 void AssocArray<Key, Val>::clear(bool shrinkArray) {
-	for (int ctr = 0; ctr < _arrsize; ctr++) {
+	for (uint ctr = 0; ctr < _arrsize; ctr++) {
 		if (_arr[ctr] != NULL) {
 			delete _arr[ctr];
 			_arr[ctr] = NULL;
 		}
 	}
 
-	if (shrinkArray && _arrsize > INIT_SIZE) {
-		delete _arr;
+	if (shrinkArray && _arrsize > nextTableSize(0)) {
+		delete[] _arr;
 
-		_arr = new aa_ref_t *[INIT_SIZE];
-		_arrsize = INIT_SIZE;
-
-		for (int ctr = 0; ctr < _arrsize; ctr++)
+		_arrsize = nextTableSize(0);
+		_arr = new aa_ref_t *[_arrsize];
+		assert(_arr != NULL);
+		for (uint ctr = 0; ctr < _arrsize; ctr++)
 			_arr[ctr] = NULL;
 	}
 
@@ -260,19 +278,14 @@
 template <class Key, class Val>
 void AssocArray<Key, Val>::expand_array(void) {
 	aa_ref_t **old_arr;
-	int old_arrsize, old_nele, ctr, dex;
+	uint old_arrsize, old_nele, ctr, dex;
 
 	old_nele = _nele;
 	old_arr = _arr;
 	old_arrsize = _arrsize;
 
-    // GROWTH_FACTOR 1.531415936535
 	// allocate a new array 
-	_arrsize = 153 * old_arrsize / 100;
-
-	// Ensure that _arrsize is odd.
-	_arrsize |= 1;
-
+	_arrsize = nextTableSize(old_arrsize);
 	_arr = new aa_ref_t *[_arrsize];
 
 	assert(_arr != NULL);
@@ -307,12 +320,19 @@
 
 template <class Key, class Val>
 Val &AssocArray<Key, Val>::operator [](const Key &key) {
-	int ctr = lookup(key);
+	uint ctr = lookup(key);
 
 	if (_arr[ctr] == NULL) {
 		_arr[ctr] = new aa_ref_t(key);
 		_nele++;
 
+#ifdef DEBUG_HASH_COLLISIONS
+		if (ctr != hashit(key, _arrsize)) {
+			_collisions++;
+//			fprintf(stderr, "collisions = %d\n", _collisions);
+		}
+#endif
+		// Only fill array to fifty percent
 		if (_nele > _arrsize / 2) {
 			expand_array();
 			ctr = lookup(key);
@@ -329,7 +349,7 @@
 
 template <class Key, class Val>
 const Val &AssocArray<Key, Val>::queryVal(const Key &key) const {
-	int ctr = lookup(key);
+	uint ctr = lookup(key);
 	assert(_arr[ctr] != NULL);
 	return _arr[ctr]->dat;
 }


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





More information about the Scummvm-git-logs mailing list