Efficient data structure for GUIDs

后端 未结 3 1244
南笙
南笙 2020-12-30 07:01

I am in search of a data structure which enables me to quickly (prefarably O(1)-quickly) determine if a given GUID is a member of a Collection of GUIDs or not.

My cu

相关标签:
3条回答
  • 2020-12-30 07:30

    Very few data structures offer O(1) access. One's the Array, the other one's the HashMap (David's answer), and I only know one other: The Trie. Here follows a simple implementation of a bit-wise Trie: Has some interesting properties:

    • Immune to memory fragmentation since no re-allocations take place.
    • O(1) add and existence test. Of course, the constant involved in O(1) is fairly large.

    The code:

    program Project23;
    
    {$APPTYPE CONSOLE}
    
    uses
      SysUtils, Generics.Collections;
    
    type
    
      PGuidTrieNode=^TGuidTrieNode;
      TGuidTrieNode = record
        Sub:array[Boolean] of PGuidTrieNode;
      end;
      TGuidByteArray = array[0..15] of Byte;
    
      TGuidTrie = class
      protected
        Root: PGuidTrieNode;
      public
        constructor Create;
        destructor Destroy;override;
    
        procedure Add(G: TGUID);
        function Exists(G: TGUID): Boolean;
      end;
    
    { TGuidTrie }
    
    procedure TGuidTrie.Add(G: TGUID);
    var GBA: TGuidByteArray absolute G;
        Node: PGuidTrieNode;
        i: Integer;
        Bit: Integer;
        IsBitSet: Boolean;
    const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
    begin
      Assert(SizeOf(G) = SizeOf(TGuidByteArray));
      Node := Root;
      for i:=0 to High(GBA) do
      begin
        for Bit := 0 to 7 do
        begin
          IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
          if (i = High(GBA)) and (Bit = 7) then
            begin
              // Payload
              Node.Sub[IsBitSet] := Pointer(1);
            end
          else
            begin
              if not Assigned(Node.Sub[IsBitSet]) then
                Node.Sub[IsBitSet] := GetMemory(SizeOf(TGuidTrieNode));
              Node := Node.Sub[IsBitSet];
            end;
        end;
      end;
    end;
    
    constructor TGuidTrie.Create;
    begin
      Root := GetMemory(SizeOf(TGuidTrieNode))
    end;
    
    destructor TGuidTrie.Destroy;
    
      procedure KillNode(Node: PGuidTrieNode);
      var i:Integer;
      begin
        if Assigned(Node.Sub[True]) then
            if Node.Sub[True] <> Pointer(1) then
            begin
              KillNode(Node.Sub[True]);
            end;
        FreeMemory(Node);
      end;
    
    begin
      KillNode(Root);
      inherited;
    end;
    
    function TGuidTrie.Exists(G: TGUID): Boolean;
    var GBA: TGuidByteArray absolute G;
        Node: PGuidTrieNode;
        i: Integer;
        Bit: Integer;
        IsBitSet: Boolean;
    const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
    begin
      Assert(SizeOf(G) = SizeOf(TGuidByteArray));
      Node := Root;
      for i:=0 to 15 do
      begin
        for Bit := 0 to 7 do
        begin
          IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
          if not Assigned(Node.Sub[IsBitSet]) then
          begin
            Result := False;
            Exit;
          end;
          Node := Node.Sub[IsBitSet];
        end;
      end;
      Result := True; // Node now contains the Payload
    end;
    
    const G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
          G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';
    
    var T: TGuidTrie;
    
    begin
      try
    
        T := TGuidTrie.Create;
        try
          if T.Exists(G1) then WriteLn('Exists')
                          else WriteLn('NOT Exists');
          T.Add(G1);
          if T.Exists(G1) then WriteLn('Exists')
                          else WriteLn('NOT Exists');
    
          if T.Exists(G2) then WriteLn('Exists')
                          else WriteLn('NOT Exists');
          T.Add(G2);
          if T.Exists(G2) then WriteLn('Exists')
                          else WriteLn('NOT Exists');
        finally T.Free;
        end;
    
      except
        on E: Exception do
          Writeln(E.ClassName, ': ', E.Message);
      end;
    end.
    
    0 讨论(0)
  • 2020-12-30 07:36
    type
        PGuidDictionaryItem = ^TGuidDictionaryItem;
    
        TGuidDictionaryItem = record
            Key: TGuid;
            Value: Pointer;
            Next: PGuidDictionaryItem;
        end;
    
        TGuidDictionary = class
        private
        const
            HashSize = 2048;
        var
            Size: integer;
            FTable: array [0..HashSize-1] of PGuidDictionaryItem;
    
            function GetHashCode(Guid: TGUID): integer;
        public
            constructor Create;
            destructor Destroy; override;
    
            procedure Add(Key: TGUID; Value: TObject);
            function TryFind(Key: TGUID; out Value: TObject): boolean;
            function Contains(Key: TGUID): Boolean;
            procedure Remove(Key: TGuid);
        end;
    
    { TGuidDictionary }
    
    procedure TGuidDictionary.Add(Key: TGUID; Value: TObject);
    var
        Hc: integer;
        PHi: PGuidDictionaryItem;
    begin
        Hc := GetHashCode(Key);
    
        if FTable[Hc] <> nil then
        begin
            PHi := FTable[Hc];
            repeat
                if TGuidEx.EqualGuids(PHi.Key, Key) then
                    Break;
    
                PHi := Phi.Next;
            until PHi = nil;
        end
        else
            Phi := nil;
    
        if PHi <> nil then
            PHi.Value := Value
        else
        begin
            New(PHi);
            PHi.Value := Value;
            PHi.Key := Key;
            PHi.Next := FTable[Hc];
            FTable[Hc] := PHi;
        end;
    end;
    
    function TGuidDictionary.Contains(Key: TGUID): Boolean;
    var
        O: TObject;
    begin
        Result := TryFind(Key, O);
    end;
    
    constructor TGuidDictionary.Create;
    var
        i: integer;
    begin
        inherited;
    
        for i := Low(FTable) to High(FTable) do
            FTable[i] := nil;
    end;
    
    destructor TGuidDictionary.Destroy;
    var
        i: integer;
        Phi, PhiNext: PGuidDictionaryItem;
    begin
        for i := Low(FTable) to High(FTable) do
        begin
            Phi := FTable[i];
            while Phi <> nil do
            begin
                PhiNext := Phi.Next;
                Dispose(Phi);
                Phi := PhiNext;
            end;
        end;
    
        inherited;
    end;
    
    function TGuidDictionary.GetHashCode(Guid: TGUID): integer;
    var
        N: array [0..3] of integer absolute Guid;
    begin
        Result := Abs(N[0] xor N[1] xor N[2] xor N[3]) mod HashSize;
    end;
    
    procedure TGuidDictionary.Remove(Key: TGuid);
    var
        Hc: Integer;
        Phi, BeforPhi: PGuidDictionaryItem;
    
    begin
        Hc := GetHashCode(Key);
    
        BeforPhi := nil;
        Phi := FTable[Hc];
        while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
        begin
            BeforPhi := Phi;
            Phi := Phi.Next;
        end;
    
        if Phi = nil then
            Exit;
    
        if BeforPhi <> nil then
            BeforPhi.Next := Phi.Next
        else
            FTable[Hc] := Phi.Next;
    
        Dispose(Phi);
    end;
    
    function TGuidDictionary.TryFind(Key: TGUID; out Value: TObject): boolean;
    var
        Hc: Integer;
        Phi: PGuidDictionaryItem;
    begin
        Hc := GetHashCode(Key);
        Phi := FTable[Hc];
        while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
            Phi := Phi.Next;
    
        if Phi <> nil then
            Value := TObject(Phi.Value)
        else
            Value := nil;
    
        Result := Phi <> nil;
    end;
    
    procedure TestDictMisc.TestGuidDictionary;
    const
        G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
        G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';
    var
        T: TGuidDictionary;
        Obj1, Obj2, O: TObject;
    begin
        T := TGuidDictionary.Create;
        Obj1 := TObject.Create();
        Obj2 := TObject.Create();
        try
            CheckFalse(T.Contains(G1));
    
            T.Add(G1, Obj1);
            CheckTrue(T.Contains(G1));
    
            T.Add(G2, Obj2);
            CheckTrue(T.Contains(G2));
    
            T.Add(G2, Obj2);
            CheckTrue(T.Contains(G2));
    
            CheckTrue(T.TryFind(G1, {out} O));
            CheckSame(Obj1, O);
    
            CheckTrue(T.TryFind(G2, {out} O));
            CheckSame(Obj2, O);
    
            T.Remove(G1);
            CheckFalse(T.Contains(G1));
            CheckFalse(T.TryFind(G1, {out} O));
    
            T.Add(G1, Obj1);
            CheckTrue(T.TryFind(G1, {out} O));
            CheckSame(Obj1, O);
    
        finally
            Obj1.Free();
            Obj2.Free();
    
            T.Free;
        end;
    end;
    
    0 讨论(0)
  • 2020-12-30 07:39

    I think you are 99% of the way there.

    Hashing sounds like the right solution. The obvious way to take advantage of the special nature of the GUID is to supply your own hash function which combines into a single 32 bit integer the 4 32 bit integers that make up a GUID. I'd just XOR the 4 integers.

    I presume you are using Generics.Collections.TDictionary. You can supply your own hash function by passing a custom comparer to the constructor. I wouldn't worry about storing spare values, I don't think it will affect performance in a discernible way.

    I trust that you are storing your GUIDs as 128 bit integers and not as strings.

    Finally, it has occurred to me that the default comparer for a GUID might indeed already do the hash code generation this way. It's worth checking that out before making any changes.

    EDIT

    Default hash code uses Bob Jenkins hash applied to the binary data. An XOR would be faster, but the default hash code doesn't seem like it would be a performance bottleneck.

    In other words, I think that TDictionary<TGUID,Integer> will serve your needs perfectly adequately.

    0 讨论(0)
提交回复
热议问题