I am in search of a data structure which enables me to quickly (prefarably O(1)-quickly) determine if a given GUID is a member of a Collection of GUIDs or not.
My cu
Very few data structures offer O(1) access. One's the Array, the other one's the HashMap (David's answer), and I only know one other: The Trie. Here follows a simple implementation of a bit-wise Trie: Has some interesting properties:
The code:
program Project23;
{$APPTYPE CONSOLE}
uses
SysUtils, Generics.Collections;
type
PGuidTrieNode=^TGuidTrieNode;
TGuidTrieNode = record
Sub:array[Boolean] of PGuidTrieNode;
end;
TGuidByteArray = array[0..15] of Byte;
TGuidTrie = class
protected
Root: PGuidTrieNode;
public
constructor Create;
destructor Destroy;override;
procedure Add(G: TGUID);
function Exists(G: TGUID): Boolean;
end;
{ TGuidTrie }
procedure TGuidTrie.Add(G: TGUID);
var GBA: TGuidByteArray absolute G;
Node: PGuidTrieNode;
i: Integer;
Bit: Integer;
IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
Assert(SizeOf(G) = SizeOf(TGuidByteArray));
Node := Root;
for i:=0 to High(GBA) do
begin
for Bit := 0 to 7 do
begin
IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
if (i = High(GBA)) and (Bit = 7) then
begin
// Payload
Node.Sub[IsBitSet] := Pointer(1);
end
else
begin
if not Assigned(Node.Sub[IsBitSet]) then
Node.Sub[IsBitSet] := GetMemory(SizeOf(TGuidTrieNode));
Node := Node.Sub[IsBitSet];
end;
end;
end;
end;
constructor TGuidTrie.Create;
begin
Root := GetMemory(SizeOf(TGuidTrieNode))
end;
destructor TGuidTrie.Destroy;
procedure KillNode(Node: PGuidTrieNode);
var i:Integer;
begin
if Assigned(Node.Sub[True]) then
if Node.Sub[True] <> Pointer(1) then
begin
KillNode(Node.Sub[True]);
end;
FreeMemory(Node);
end;
begin
KillNode(Root);
inherited;
end;
function TGuidTrie.Exists(G: TGUID): Boolean;
var GBA: TGuidByteArray absolute G;
Node: PGuidTrieNode;
i: Integer;
Bit: Integer;
IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
Assert(SizeOf(G) = SizeOf(TGuidByteArray));
Node := Root;
for i:=0 to 15 do
begin
for Bit := 0 to 7 do
begin
IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
if not Assigned(Node.Sub[IsBitSet]) then
begin
Result := False;
Exit;
end;
Node := Node.Sub[IsBitSet];
end;
end;
Result := True; // Node now contains the Payload
end;
const G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';
var T: TGuidTrie;
begin
try
T := TGuidTrie.Create;
try
if T.Exists(G1) then WriteLn('Exists')
else WriteLn('NOT Exists');
T.Add(G1);
if T.Exists(G1) then WriteLn('Exists')
else WriteLn('NOT Exists');
if T.Exists(G2) then WriteLn('Exists')
else WriteLn('NOT Exists');
T.Add(G2);
if T.Exists(G2) then WriteLn('Exists')
else WriteLn('NOT Exists');
finally T.Free;
end;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
end.
type
PGuidDictionaryItem = ^TGuidDictionaryItem;
TGuidDictionaryItem = record
Key: TGuid;
Value: Pointer;
Next: PGuidDictionaryItem;
end;
TGuidDictionary = class
private
const
HashSize = 2048;
var
Size: integer;
FTable: array [0..HashSize-1] of PGuidDictionaryItem;
function GetHashCode(Guid: TGUID): integer;
public
constructor Create;
destructor Destroy; override;
procedure Add(Key: TGUID; Value: TObject);
function TryFind(Key: TGUID; out Value: TObject): boolean;
function Contains(Key: TGUID): Boolean;
procedure Remove(Key: TGuid);
end;
{ TGuidDictionary }
procedure TGuidDictionary.Add(Key: TGUID; Value: TObject);
var
Hc: integer;
PHi: PGuidDictionaryItem;
begin
Hc := GetHashCode(Key);
if FTable[Hc] <> nil then
begin
PHi := FTable[Hc];
repeat
if TGuidEx.EqualGuids(PHi.Key, Key) then
Break;
PHi := Phi.Next;
until PHi = nil;
end
else
Phi := nil;
if PHi <> nil then
PHi.Value := Value
else
begin
New(PHi);
PHi.Value := Value;
PHi.Key := Key;
PHi.Next := FTable[Hc];
FTable[Hc] := PHi;
end;
end;
function TGuidDictionary.Contains(Key: TGUID): Boolean;
var
O: TObject;
begin
Result := TryFind(Key, O);
end;
constructor TGuidDictionary.Create;
var
i: integer;
begin
inherited;
for i := Low(FTable) to High(FTable) do
FTable[i] := nil;
end;
destructor TGuidDictionary.Destroy;
var
i: integer;
Phi, PhiNext: PGuidDictionaryItem;
begin
for i := Low(FTable) to High(FTable) do
begin
Phi := FTable[i];
while Phi <> nil do
begin
PhiNext := Phi.Next;
Dispose(Phi);
Phi := PhiNext;
end;
end;
inherited;
end;
function TGuidDictionary.GetHashCode(Guid: TGUID): integer;
var
N: array [0..3] of integer absolute Guid;
begin
Result := Abs(N[0] xor N[1] xor N[2] xor N[3]) mod HashSize;
end;
procedure TGuidDictionary.Remove(Key: TGuid);
var
Hc: Integer;
Phi, BeforPhi: PGuidDictionaryItem;
begin
Hc := GetHashCode(Key);
BeforPhi := nil;
Phi := FTable[Hc];
while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
begin
BeforPhi := Phi;
Phi := Phi.Next;
end;
if Phi = nil then
Exit;
if BeforPhi <> nil then
BeforPhi.Next := Phi.Next
else
FTable[Hc] := Phi.Next;
Dispose(Phi);
end;
function TGuidDictionary.TryFind(Key: TGUID; out Value: TObject): boolean;
var
Hc: Integer;
Phi: PGuidDictionaryItem;
begin
Hc := GetHashCode(Key);
Phi := FTable[Hc];
while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
Phi := Phi.Next;
if Phi <> nil then
Value := TObject(Phi.Value)
else
Value := nil;
Result := Phi <> nil;
end;
procedure TestDictMisc.TestGuidDictionary;
const
G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';
var
T: TGuidDictionary;
Obj1, Obj2, O: TObject;
begin
T := TGuidDictionary.Create;
Obj1 := TObject.Create();
Obj2 := TObject.Create();
try
CheckFalse(T.Contains(G1));
T.Add(G1, Obj1);
CheckTrue(T.Contains(G1));
T.Add(G2, Obj2);
CheckTrue(T.Contains(G2));
T.Add(G2, Obj2);
CheckTrue(T.Contains(G2));
CheckTrue(T.TryFind(G1, {out} O));
CheckSame(Obj1, O);
CheckTrue(T.TryFind(G2, {out} O));
CheckSame(Obj2, O);
T.Remove(G1);
CheckFalse(T.Contains(G1));
CheckFalse(T.TryFind(G1, {out} O));
T.Add(G1, Obj1);
CheckTrue(T.TryFind(G1, {out} O));
CheckSame(Obj1, O);
finally
Obj1.Free();
Obj2.Free();
T.Free;
end;
end;
I think you are 99% of the way there.
Hashing sounds like the right solution. The obvious way to take advantage of the special nature of the GUID is to supply your own hash function which combines into a single 32 bit integer the 4 32 bit integers that make up a GUID. I'd just XOR the 4 integers.
I presume you are using Generics.Collections.TDictionary. You can supply your own hash function by passing a custom comparer to the constructor. I wouldn't worry about storing spare values, I don't think it will affect performance in a discernible way.
I trust that you are storing your GUIDs as 128 bit integers and not as strings.
Finally, it has occurred to me that the default comparer for a GUID might indeed already do the hash code generation this way. It's worth checking that out before making any changes.
EDIT
Default hash code uses Bob Jenkins hash applied to the binary data. An XOR would be faster, but the default hash code doesn't seem like it would be a performance bottleneck.
In other words, I think that TDictionary<TGUID,Integer>
will serve your needs perfectly adequately.