openafs/src/WINNT/afsapplib/hashlist.h
2000-11-04 10:01:08 +00:00

522 lines
18 KiB
C++

/*
* Copyright 2000, International Business Machines Corporation and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*/
/*
* HashList - super-quick list manager
*
*/
#ifndef HASHLIST_H
#define HASHLIST_H
#ifndef EXPORTED
#define EXPORTED
#endif
/*
* A HASHLIST effectively implements an in-memory database: it maintains
* the objects you specify in a managed list, placing hashtables across
* whatever key fields you select. The practical upshot is that it's a
* thread-safe, convenient resizable-array manager that provides fast
* searching features.
*
* There are many tidbits about using HashLists mentioned in the examples
* below--some are pretty important, so read through them carefully.
*
* EXAMPLES ___________________________________________________________________
*
* // Some boring example dataset--three OBJECT structures
*
* typedef struct OBJECT {
* DWORD dwKey;
* TCHAR szText[ 256 ];
* } OBJECT;
*
* static OBJECT aObjects[] = {
* { 275, "First Object" },
* { 473, "Second Object" },
* { 32, "Third Object" },
* };
*
* // When adding data to the list, note that it's not necessary to create
* // keys at any particular time (or at all). Objects already in the list
* // when you add a new key will automatically get indexed, just as will any
* // new objects that you add later. Remember that you can't track NULL
* // objects in the list--hl.Add(0) is illegal. An object can be added in
* // the list any number of times.
*
* HASHLIST hl;
* hl.Add (&aObjects[0]);
* hl.CreateKey ("MyKey", MyKey_Compare, MyKey_HashObject, MyKey_HashData);
* hl.Add (&aObjects[1]);
* hl.Add (&aObjects[2]);
*
* // Walk the hashlist. There are a few things worthy of note here
* // besides the enumeration technique itself: calling FindFirst() returns
* // a pointer to an *allocated* ENUMERATION structure. The structure
* // is automatically freed whenever a FindNext() or FindPrevious() is
* // about to return NULL--so loops like the one below don't have to
* // do anything to free the ENUMERATION object. Since a hashlist holds
* // a critical section while any ENUMERATION object exists for it,
* // it's important to remember that if you do not walk the list to its end
* // you must explicitly free the enumeration object yourself.
*
* for (LPENUM pEnum = hl.FindFirst(); pEnum; pEnum = pEnum->FindNext())
* {
* OBJECT *pObj = (OBJECT*)(pEnum->GetObject());
* printf ("%s", pObj->szText);
* }
*
* // Find the first object with 473 as its dwKey element. Note that since
* // we're not walking the whole list, we have to free the ENUMERATION object
* // when we're done.
*
* HASHLISTKEY pKey = hl.FindKey ("MyKey");
* DWORD dwKeyFind = 473;
*
* LPENUM pEnum;
* if ((pEnum = pKey->FindFirst (&dwKeyFind)) != NULL)
* {
* OBJECT *pObj = (OBJECT*)(pEnum->GetObject());
* printf ("%s", pObj->szText);
* delete pEnum;
* }
*
* // A shorter way to get just the first object with 473 as a dwKey element.
* // Since it doesn't return an ENUMERATION object, you don't have to free
* // anything.
*
* OBJECT *pObj = (OBJECT*)pKey->GetFirstObject (&dwKeyFind);
*
* // Find all objects with 473 as a dwKey element. Since we're walking the
* // whole list, the ENUMERATION object will be deleted automatically.
*
* for (LPENUM pEnum = pKey->FindFirst (&dwKeyFind); pEnum; pEnum = pEnum->FindNext())
* {
* OBJECT *pObj = (OBJECT*)(pEnum->GetObject());
* printf ("%s", pObj->szText);
* }
*
* // HashLists also provide fast (i.e., effectively constant-time) testing
* // to see if an object is already in the list; that allows use of the
* // AddUnique() item, which ensures duplicates won't occur.
*
* HASHLIST hl;
* hl.AddUnique (&aObjects[0]);
* hl.AddUnique (&aObjects[0]); // succeeds but doesn't add item
* hl.Remove (&aObjects[0]); // list is now empty
*
* HASHLIST hl;
* HASHLISTKEY pKey = hl.CreateKey ("IntKey", IntKey_Compare, IntKey_HashObject, IntKey_HashData);
* int a = 153;
* int b = 287;
* int c = 153;
* hl.AddUnique (&a, pKey);
* hl.AddUnique (&b, pKey);
* hl.AddUnique (&c, pKey);
* // The list now contains two items: {&a} and {&b}.
* // Because {&c} was not unique over key {pKey}, it was not added.
*
* // Remove the first object with a dwKey of 473 from the list. Since we're
* // starting an enumeration but not walking it to its end, we'll have
* // to free the ENUMERATION object explicitly.
*
* LPENUM pEnum;
* if ((pEnum = pKey->FindFirst (&dwKeyFind)) != NULL)
* {
* hl.Remove (pEnum->GetObject());
* delete pEnum;
* }
*
* // Remove all objects in the list--technique one. This technique emphasises
* // an useful bit of data: if you remove the object that your ENUMERATION
* // object represents, you can still use that enumeration object to continue
* // the enumeration! For the fun of it, we'll show erasing the list both
* // forwards and backwards.
*
* for (LPENUM pEnum = hl.FindFirst(); pEnum; pEnum = pEnum->FindNext()) {
* hl.Remove (pEnum->GetObject());
* }
*
* for (LPENUM pEnum = hl.FindLast(); pEnum; pEnum = pEnum->FindPrevious()) {
* hl.Remove (pEnum->GetObject());
* }
*
* // Remove all objects--technique two. This technique is a little messier:
* // it finds the first object in the list, removes it, and re-starts the
* // enumeration. Since an enumeration is started but not walked to its end,
* // the ENUMERATION object must be freed explicitly in each iteration.
*
* LPENUM pEnum;
* while ((pEnum = hl.FindFirst()) != NULL) {
* hl.Remove (pEnum->GetObject());
* delete pEnum;
* }
*
* // Remove all objects--technique three. This is essentially the same
* // as technique two (find the first object, remove it, repeat), except
* // that it avoids using an ENUMERATION object so there's nothing to free.
*
* OBJECT *pObject;
* while ((pObject = hl.GetFirstObject()) != NULL)
* hl.Remove (pObject);
*
* // If you change something in an object that you think would affect one or
* // more of the list's keys, you should call tell the HashList so it
* // can update itself. An important note: if you're enumerating all items,
* // you can call Update() for any item without affecting the enumeration;
* // however, if you're enumerating along a key when you call Update(),
* // you'll need to stop the enumeration.
*
* aObjects[2].dwKey = 182;
* hl.Update (&aObjects[2]); // (same as hl.Remove(...) + hl.Add(...))
*
* // It's important to remember that the HashList only knows about your
* // objects as generic pointers--it doesn't free them if you remove
* // them from the list.
*
* OBJECT *pObject = new OBJECT;
* hl.Add (pObject);
* hl.Remove (pObject);
* delete pObject;
*
* // Another point about freeing objects that you have in the list:
* // it's safe to delete the object even before you remove it from the
* // list, but if you do so, make sure you remove the object immediately.
* // Also, to be thread-safe, you should surround the section with
* // Enter()/Leave(); otherwise, another thread may wake up and try to
* // work with the list between when you free the object and when you
* // remove it from the list.
*
* OBJECT *pObject = new OBJECT;
* hl.Add (pObject);
* hl.Enter();
* delete pObject;
* hl.Remove (pObject);
* hl.Leave();
*
* // Each key requires that you supply three callback functions:
* // one to extract the key from an Object and hash it, one to
* // just hash a key directly, and one to check an object to see
* // if it matches the specified key exactly. A HASHVALUE is really
* // just a DWORD--don't worry about the range (since the hashlist
* // will automatically modulo the return by the size of its table).
*
* HASHVALUE CALLBACK MyKey_HashObject (LPHASHLISTKEY pKey, PVOID pObject) {
* return MyKey_HashData (pKey, &((OBJECT*)pObject)->dwKey);
* }
*
* HASHVALUE CALLBACK MyKey_HashData (LPHASHLISTKEY pKey, PVOID pData) {
* return (HASHVALUE)*(DWORD*)pData;
* }
*
* BOOL CALLBACK MyKey_Compare (LPHASHLISTKEY pKey, PVOID pObject, PVOID pData) {
* return (((OBJECT*)pObject)->dwKey == *(DWORD*)pData;
* }
*
*/
#define cTABLESIZE_DEFAULT 1000
typedef class EXPORTED EXPANDARRAY EXPANDARRAY, *LPEXPANDARRAY;
typedef class EXPORTED HASHLIST HASHLIST, *LPHASHLIST;
typedef class EXPORTED HASHLISTKEY HASHLISTKEY, *LPHASHLISTKEY;
typedef class EXPORTED ENUMERATION ENUMERATION, ENUM, *LPENUMERATION, *LPENUM;
typedef DWORD HASHVALUE;
typedef BOOL (CALLBACK * LPHASHFUNC_COMPAREOBJECTDATA)(LPHASHLISTKEY pKey, PVOID pObject, PVOID pData);
typedef HASHVALUE (CALLBACK * LPHASHFUNC_HASHOBJECT)(LPHASHLISTKEY pKey, PVOID pObject);
typedef HASHVALUE (CALLBACK * LPHASHFUNC_HASHDATA)(LPHASHLISTKEY pKey, PVOID pData);
typedef struct HASHLISTENTRY
{
PVOID pObject;
HASHVALUE hv;
size_t iPrevious;
size_t iNext;
} HASHLISTENTRY, *LPHASHLISTENTRY;
typedef struct HASHLISTKEYDEBUGINFO
{
size_t cBuckets; // number of buckets in the hashtable currently
size_t *aBuckets; // number of items in each bucket in the hashtable
WORD perEffective; // percent effectiveness of hashtable (100%=even distrib)
} HASHLISTKEYDEBUGINFO, *LPHASHLISTKEYDEBUGINFO;
/*
* EXPANDARRAY CLASS __________________________________________________________
*
*/
typedef struct
{
PVOID aElements; // = EXPANDARRAYHEAP.aElements + 4;
// Followed by allocated space for aElements
} EXPANDARRAYHEAP, *LPEXPANDARRAYHEAP;
class EXPORTED EXPANDARRAY
{
public:
EXPANDARRAY (size_t cbElement, size_t cElementsPerHeap = 0);
~EXPANDARRAY (void);
PVOID GetAt (size_t iElement);
PVOID SetAt (size_t iElement, PVOID pData = NULL);
private:
size_t m_cbElement;
size_t m_cElementsPerHeap;
size_t m_cHeaps;
LPEXPANDARRAYHEAP *m_aHeaps;
};
/*
* HASHLIST CLASS _____________________________________________________________
*
*/
class EXPORTED HASHLIST
{
friend class HASHLISTKEY;
friend class ENUMERATION;
public:
HASHLIST (void);
~HASHLIST (void);
// A HashList is basically just a resizable array of objects.
// As such, the class exposes methods for adding objects to,
// and removing objects from, the list.
//
BOOL Add (PVOID pObjectToAdd);
void Remove (PVOID pObjectToRemove);
BOOL Update (PVOID pObjectToUpdate); // like Remove() followed by Add()
// Calling AddUnique() instead of Add() to add an item ensures
// that the specified item will only be added if it does not
// already exist in the list. If a key is also specified during the
// AddUnique() call, then the test will be performed over a specific
// key--detecting duplicate items by checking the key's indexed fields,
// rather than just comparing item's addresses.
//
BOOL AddUnique (PVOID pObjectToAdd, LPHASHLISTKEY pKeyUnique = NULL);
BOOL fIsInList (PVOID pObject);
// Each HashList can use one or more "keys" for looking up items;
// each key represents with an internally-maintained hash table
// and corresponding hashing functions (which you supply when you
// create a key). Keys can be added or removed at any time.
//
LPHASHLISTKEY CreateKey (LPCTSTR pszKeyName,
LPHASHFUNC_COMPAREOBJECTDATA fnCompareObjectData,
LPHASHFUNC_HASHOBJECT fnHashObject,
LPHASHFUNC_HASHDATA fnHashData,
size_t cInitialTableSize = cTABLESIZE_DEFAULT);
LPHASHLISTKEY FindKey (LPCTSTR pszKeyName);
void RemoveKey (LPHASHLISTKEY pKey);
// A list isn't much good without methods for enumeration.
// A HashList provides enumeration in the form of a list
// rather than an array (to prevent problems with holes when
// items are removed). You can walk the list in either direction.
// An important note: a HashList does not guarantee the order
// of enumeration will match the order of insertion (and, in fact,
// it probably won't).
//
// Alternately, you can initiate the walk from a HASHLISTKEY;
// doing so allows you to quickly enumerate only items which match a
// particular value on that key, improving search performance.
//
LPENUM FindFirst (void);
LPENUM FindLast (void);
PVOID GetFirstObject (void);
PVOID GetLastObject (void);
// It's also possible to quickly determine the number of objects
// in the list, without enumerating them all.
//
size_t GetCount (void);
// All HASHLIST routines are thread-safe, but you can wrap compound
// operations using the same critical section as the HASHLIST uses
// internally. You can also specify an alternate critical section
// for the HASHLIST to use instead.
//
void Enter (void);
void Leave (void);
void SetCriticalSection (CRITICAL_SECTION *pcs); // NULL = use internal
private:
size_t m_iFirst;
size_t m_iLast;
size_t m_iNextFree;
LPEXPANDARRAY m_aObjects;
size_t m_cObjects;
size_t m_cObjectsMax;
LPHASHLISTKEY *m_apKeys;
size_t m_cpKeys;
LPHASHLISTKEY m_pKeyIndex;
static BOOL CALLBACK KeyIndex_CompareObjectData (LPHASHLISTKEY pKey, PVOID pObject, PVOID pData);
static HASHVALUE CALLBACK KeyIndex_HashObject (LPHASHLISTKEY pKey, PVOID pObject);
static HASHVALUE CALLBACK KeyIndex_HashData (LPHASHLISTKEY pKey, PVOID pData);
CRITICAL_SECTION m_cs;
CRITICAL_SECTION *m_pcs;
};
/*
* HASHLISTKEY CLASS _____________________________________________________________
*
*/
class EXPORTED HASHLISTKEY
{
friend class HASHLIST;
friend class ENUMERATION;
public:
// Since every HASHLISTKEY is a member of exactly one HASHLIST,
// it contains a pointer back to that HASHLIST. You can get that
// pointer, to determine which list the key is used in.
//
LPHASHLIST GetHashList (void);
// Each HASHLISTKEY contains an internal (hidden) hashtable,
// and pointers to the functions which you supplied to hash objects
// for inclusion in that table. For convenience, it exposes methods
// for calling those functions.
//
BOOL CompareObjectData (PVOID pObject, PVOID pData);
HASHVALUE HashObject (PVOID pObject);
HASHVALUE HashData (PVOID pData);
// Instead of initiating an enumeration through the HASHLIST, you may
// initiate an enumeration through a HASHLISTKEY. Doing so indicates
// that you only want to enumerate items whose keys match a particular
// value; the HASHLISTKEY's internal hashtable is employed to speed the
// search up.
//
LPENUM FindFirst (PVOID pData);
LPENUM FindLast (PVOID pData);
PVOID GetFirstObject (PVOID pData);
PVOID GetLastObject (PVOID pData);
BOOL fIsInList (PVOID pObject); // Equivalent to (!!GetFirstObject())
// For debugging purposes, when developing your hashing algorithms
// you may want to use the following routines to examine the distribution
// of data within the key. They enable you to see how many items are
// on each bucket within the hash table; other statistics are available.
//
LPHASHLISTKEYDEBUGINFO GetDebugInfo (void);
void FreeDebugInfo (LPHASHLISTKEYDEBUGINFO pInfo);
private:
HASHLISTKEY (LPHASHLIST pList, LPCTSTR pszKeyName, LPHASHFUNC_COMPAREOBJECTDATA fnCompareObjectData, LPHASHFUNC_HASHOBJECT fnHashObject, LPHASHFUNC_HASHDATA fnHashData, size_t cTableSize);
~HASHLISTKEY (void);
void Add (size_t iObject, PVOID pObject);
void Remove (size_t iObject);
void Resize (void);
private:
LPEXPANDARRAY m_aObjects;
struct HASHBUCKET {
size_t iFirst;
size_t iLast;
} *m_aBuckets;
size_t m_cBuckets;
LPHASHLIST m_pList;
TCHAR m_szKeyName[ 256 ];
LPHASHFUNC_COMPAREOBJECTDATA m_fnCompareObjectData;
LPHASHFUNC_HASHOBJECT m_fnHashObject;
LPHASHFUNC_HASHDATA m_fnHashData;
};
/*
* ENUMERATION CLASS _____________________________________________________________
*
*/
class EXPORTED ENUMERATION
{
friend class HASHLIST;
friend class HASHLISTKEY;
public:
// Each ENUMERATION object represents an object in the HASHLIST.
// You can find the HASHLIST object that the ENUMERATION represents.
//
PVOID GetObject (void);
// Additionally, an ENUMERATION object allows you to traverse
// (forwards or backwards) the list of objects which meet your search
// criteria. If you started the enumeration through a HASHLIST
// object then all objects in the list will be enumerated; if you
// started the enumeration through a HASHLISTKEY object then only
// those objects which have the specified value for that key will be
// enumerated. If either of the routines below is about to return NULL,
// it will automatically 'delete this' before doing so--ending the
// enumeration.
//
LPENUMERATION FindNext (void);
LPENUMERATION FindPrevious (void);
// You obtain an ENUMERATION object by calling a FindFirst() method,
// either through the HASHLIST object or one of its keys. As mentioned
// in the examples above, the ENUMERATION object is automatically
// freed whenever FindNext() or FindPrevious() returns NULL--however,
// if you stop the enumeration before then, you'll have to explicitly
// free the object with 'delete'. Failure to do so means (1) a memory
// leak, and more seriously, (2) the active thread will hold a critical
// section for the hashlist forever.
//
~ENUMERATION (void);
private:
ENUMERATION (LPHASHLIST pList, size_t iObject, LPHASHLISTKEY pKey = NULL, PVOID pData = NULL);
void ENUMERATION::PrepareWalk (void);
size_t m_iObject;
size_t m_iNext;
size_t m_iPrevious;
LPHASHLIST m_pList;
LPHASHLISTKEY m_pKey;
PVOID m_pData;
};
/*
* GENERAL-PURPOSE ____________________________________________________________
*
*/
EXPORTED HASHVALUE HashString (LPCTSTR pszString);
EXPORTED HASHVALUE HashAnsiString (LPCSTR pszStringA);
EXPORTED HASHVALUE HashUnicodeString (LPWSTR pszStringW);
#endif // HASHLIST_H