Skip to content

Commit 8e4aa20

Browse files
committed
[esan] Add generic resizing hashtable
Summary: Adds a new, generic, resizing hashtable data structure for use by esan tools. No existing sanitizer hashtable is suitable for the use case for most esan tools: we need non-fixed-size tables, parameterized keys and payloads, and write access to payloads. The new hashtable uses either simple internal or external mutex locking and supports custom hash and comparision operators. The focus is on functionality, not performance, to catalyze creation of a variety of tools. We can optimize the more successful tools later. Adds tests of the data structure. Reviewers: aizatsky Subscribers: vitalybuka, zhaoqin, kcc, eugenis, llvm-commits, kubabrecka Differential Revision: https://reviews.llvm.org/D22681 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@278024 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent d82731d commit 8e4aa20

File tree

2 files changed

+384
-0
lines changed

2 files changed

+384
-0
lines changed

lib/esan/esan_hashtable.h

+250
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
//===-- esan_hashtable.h ----------------------------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file is a part of EfficiencySanitizer, a family of performance tuners.
11+
//
12+
// Generic resizing hashtable.
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "sanitizer_common/sanitizer_allocator_internal.h"
16+
#include "sanitizer_common/sanitizer_internal_defs.h"
17+
#include "sanitizer_common/sanitizer_mutex.h"
18+
#include <stddef.h>
19+
20+
namespace __esan {
21+
22+
//===----------------------------------------------------------------------===//
23+
// Default hash and comparison functions
24+
//===----------------------------------------------------------------------===//
25+
26+
template <typename T> struct DefaultHash {
27+
size_t operator()(const T &Key) const {
28+
return (size_t)Key;
29+
}
30+
};
31+
32+
template <typename T> struct DefaultEqual {
33+
bool operator()(const T &Key1, const T &Key2) const {
34+
return Key1 == Key2;
35+
}
36+
};
37+
38+
//===----------------------------------------------------------------------===//
39+
// HashTable declaration
40+
//===----------------------------------------------------------------------===//
41+
42+
// A simple resizing and mutex-locked hashtable.
43+
//
44+
// If the default hash functor is used, KeyTy must have an operator size_t().
45+
// If the default comparison functor is used, KeyTy must have an operator ==.
46+
//
47+
// By default all operations are internally-synchronized with a mutex, with no
48+
// synchronization for payloads once hashtable functions return. If
49+
// ExternalLock is set to true, the caller should call the lock() and unlock()
50+
// routines around all hashtable operations and subsequent manipulation of
51+
// payloads.
52+
template <typename KeyTy, typename DataTy, bool ExternalLock = false,
53+
typename HashFuncTy = DefaultHash<KeyTy>,
54+
typename EqualFuncTy = DefaultEqual<KeyTy> >
55+
class HashTable {
56+
public:
57+
// InitialCapacity must be a power of 2.
58+
// ResizeFactor must be between 1 and 99 and indicates the
59+
// maximum percentage full that the table should ever be.
60+
HashTable(u32 InitialCapacity = 2048, u32 ResizeFactor = 70);
61+
~HashTable();
62+
bool lookup(const KeyTy &Key, DataTy &Payload); // Const except for Mutex.
63+
bool add(const KeyTy &Key, const DataTy &Payload);
64+
bool remove(const KeyTy &Key);
65+
u32 size(); // Const except for Mutex.
66+
// If the table is internally-synchronized, this lock must not be held
67+
// while a hashtable function is called as it will deadlock: the lock
68+
// is not recursive. This is meant for use with externally-synchronized
69+
// tables.
70+
void lock();
71+
void unlock();
72+
73+
private:
74+
void resize();
75+
76+
struct HashEntry {
77+
KeyTy Key;
78+
DataTy Payload;
79+
HashEntry *Next;
80+
};
81+
82+
HashEntry **Table;
83+
u32 Capacity;
84+
u32 Entries;
85+
const u32 ResizeFactor;
86+
BlockingMutex Mutex;
87+
const HashFuncTy HashFunc;
88+
const EqualFuncTy EqualFunc;
89+
};
90+
91+
//===----------------------------------------------------------------------===//
92+
// Hashtable implementation
93+
//===----------------------------------------------------------------------===//
94+
95+
template <typename KeyTy, typename DataTy, bool ExternalLock,
96+
typename HashFuncTy, typename EqualFuncTy>
97+
HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::HashTable(
98+
u32 InitialCapacity, u32 ResizeFactor)
99+
: Capacity(InitialCapacity), Entries(0), ResizeFactor(ResizeFactor),
100+
HashFunc(HashFuncTy()), EqualFunc(EqualFuncTy()) {
101+
CHECK(IsPowerOfTwo(Capacity));
102+
CHECK(ResizeFactor >= 1 && ResizeFactor <= 99);
103+
Table = (HashEntry **)InternalAlloc(Capacity * sizeof(HashEntry *));
104+
internal_memset(Table, 0, Capacity * sizeof(HashEntry *));
105+
}
106+
107+
template <typename KeyTy, typename DataTy, bool ExternalLock,
108+
typename HashFuncTy, typename EqualFuncTy>
109+
HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::~HashTable() {
110+
for (u32 i = 0; i < Capacity; ++i) {
111+
HashEntry *Entry = Table[i];
112+
while (Entry != nullptr) {
113+
HashEntry *Next = Entry->Next;
114+
Entry->Payload.~DataTy();
115+
InternalFree(Entry);
116+
Entry = Next;
117+
}
118+
}
119+
InternalFree(Table);
120+
}
121+
122+
template <typename KeyTy, typename DataTy, bool ExternalLock,
123+
typename HashFuncTy, typename EqualFuncTy>
124+
u32 HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::size() {
125+
u32 Res;
126+
if (!ExternalLock)
127+
Mutex.Lock();
128+
Res = Entries;
129+
if (!ExternalLock)
130+
Mutex.Unlock();
131+
return Res;
132+
}
133+
134+
template <typename KeyTy, typename DataTy, bool ExternalLock,
135+
typename HashFuncTy, typename EqualFuncTy>
136+
bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::lookup(
137+
const KeyTy &Key, DataTy &Payload) {
138+
if (!ExternalLock)
139+
Mutex.Lock();
140+
bool Found = false;
141+
size_t Hash = HashFunc(Key) % Capacity;
142+
HashEntry *Entry = Table[Hash];
143+
for (; Entry != nullptr; Entry = Entry->Next) {
144+
if (EqualFunc(Entry->Key, Key)) {
145+
Payload = Entry->Payload;
146+
Found = true;
147+
break;
148+
}
149+
}
150+
if (!ExternalLock)
151+
Mutex.Unlock();
152+
return Found;
153+
}
154+
155+
template <typename KeyTy, typename DataTy, bool ExternalLock,
156+
typename HashFuncTy, typename EqualFuncTy>
157+
void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::resize() {
158+
if (!ExternalLock)
159+
Mutex.CheckLocked();
160+
size_t OldCapacity = Capacity;
161+
HashEntry **OldTable = Table;
162+
Capacity *= 2;
163+
Table = (HashEntry **)InternalAlloc(Capacity * sizeof(HashEntry *));
164+
internal_memset(Table, 0, Capacity * sizeof(HashEntry *));
165+
// Re-hash
166+
for (u32 i = 0; i < OldCapacity; ++i) {
167+
HashEntry *OldEntry = OldTable[i];
168+
while (OldEntry != nullptr) {
169+
HashEntry *Next = OldEntry->Next;
170+
size_t Hash = HashFunc(OldEntry->Key) % Capacity;
171+
OldEntry->Next = Table[Hash];
172+
Table[Hash] = OldEntry;
173+
OldEntry = Next;
174+
}
175+
}
176+
}
177+
178+
template <typename KeyTy, typename DataTy, bool ExternalLock,
179+
typename HashFuncTy, typename EqualFuncTy>
180+
bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::add(
181+
const KeyTy &Key, const DataTy &Payload) {
182+
if (!ExternalLock)
183+
Mutex.Lock();
184+
bool Exists = false;
185+
size_t Hash = HashFunc(Key) % Capacity;
186+
HashEntry *Entry = Table[Hash];
187+
for (; Entry != nullptr; Entry = Entry->Next) {
188+
if (EqualFunc(Entry->Key, Key)) {
189+
Exists = true;
190+
break;
191+
}
192+
}
193+
if (!Exists) {
194+
Entries++;
195+
if (Entries * 100 >= Capacity * ResizeFactor) {
196+
resize();
197+
Hash = HashFunc(Key) % Capacity;
198+
}
199+
HashEntry *Add = (HashEntry *)InternalAlloc(sizeof(*Add));
200+
Add->Key = Key;
201+
Add->Payload = Payload;
202+
Add->Next = Table[Hash];
203+
Table[Hash] = Add;
204+
}
205+
if (!ExternalLock)
206+
Mutex.Unlock();
207+
return !Exists;
208+
}
209+
210+
template <typename KeyTy, typename DataTy, bool ExternalLock,
211+
typename HashFuncTy, typename EqualFuncTy>
212+
bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::remove(
213+
const KeyTy &Key) {
214+
if (!ExternalLock)
215+
Mutex.Lock();
216+
bool Found = false;
217+
size_t Hash = HashFunc(Key) % Capacity;
218+
HashEntry *Entry = Table[Hash];
219+
HashEntry *Prev = nullptr;
220+
for (; Entry != nullptr; Prev = Entry, Entry = Entry->Next) {
221+
if (EqualFunc(Entry->Key, Key)) {
222+
Found = true;
223+
Entries--;
224+
if (Prev == nullptr)
225+
Table[Hash] = Entry->Next;
226+
else
227+
Prev->Next = Entry->Next;
228+
Entry->Payload.~DataTy();
229+
InternalFree(Entry);
230+
break;
231+
}
232+
}
233+
if (!ExternalLock)
234+
Mutex.Unlock();
235+
return Found;
236+
}
237+
238+
template <typename KeyTy, typename DataTy, bool ExternalLock,
239+
typename HashFuncTy, typename EqualFuncTy>
240+
void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::lock() {
241+
Mutex.Lock();
242+
}
243+
244+
template <typename KeyTy, typename DataTy, bool ExternalLock,
245+
typename HashFuncTy, typename EqualFuncTy>
246+
void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::unlock() {
247+
Mutex.Unlock();
248+
}
249+
250+
} // namespace __esan

test/esan/Unit/hashtable.cpp

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// RUN: %clangxx_unit -esan-instrument-loads-and-stores=0 -O0 %s -o %t 2>&1
2+
// RUN: %env_esan_opts="record_snapshots=0" %run %t 2>&1 | FileCheck %s
3+
4+
#include "esan/esan_hashtable.h"
5+
#include <assert.h>
6+
#include <stdio.h>
7+
#include <stdlib.h>
8+
#include <string.h>
9+
10+
class MyData {
11+
public:
12+
MyData(const char *Str) : RefCount(0) { Buf = strdup(Str); }
13+
~MyData() {
14+
fprintf(stderr, " Destructor: %s.\n", Buf);
15+
free(Buf);
16+
}
17+
bool operator==(MyData &Cmp) { return strcmp(Buf, Cmp.Buf) == 0; }
18+
operator size_t() const {
19+
size_t Res = 0;
20+
for (int i = 0; i < strlen(Buf); ++i)
21+
Res ^= Buf[i];
22+
return Res;
23+
}
24+
char *Buf;
25+
int RefCount;
26+
};
27+
28+
// We use a smart pointer wrapper to free the payload on hashtable removal.
29+
struct MyDataPayload {
30+
MyDataPayload() : Data(nullptr) {}
31+
explicit MyDataPayload(MyData *Data) : Data(Data) { ++Data->RefCount; }
32+
~MyDataPayload() {
33+
if (Data && --Data->RefCount == 0) {
34+
fprintf(stderr, "Deleting %s.\n", Data->Buf);
35+
delete Data;
36+
}
37+
}
38+
MyDataPayload(const MyDataPayload &Copy) {
39+
Data = Copy.Data;
40+
++Data->RefCount;
41+
}
42+
MyDataPayload & operator=(const MyDataPayload &Copy) {
43+
if (this != &Copy) {
44+
this->~MyDataPayload();
45+
Data = Copy.Data;
46+
++Data->RefCount;
47+
}
48+
return *this;
49+
}
50+
bool operator==(MyDataPayload &Cmp) { return *Data == *Cmp.Data; }
51+
operator size_t() const { return (size_t)*Data; }
52+
MyData *Data;
53+
};
54+
55+
int main()
56+
{
57+
__esan::HashTable<int, int> IntTable;
58+
assert(IntTable.size() == 0);
59+
bool Added = IntTable.add(4, 42);
60+
assert(Added);
61+
assert(!IntTable.add(4, 42));
62+
assert(IntTable.size() == 1);
63+
int Value;
64+
bool Found = IntTable.lookup(4, Value);
65+
assert(Found && Value == 42);
66+
assert(!IntTable.remove(5));
67+
assert(IntTable.remove(4));
68+
69+
__esan::HashTable<int, MyDataPayload> DataTable(4);
70+
MyDataPayload NewData(new MyData("mystring"));
71+
Added = DataTable.add(4, NewData);
72+
assert(Added);
73+
MyDataPayload FoundData;
74+
Found = DataTable.lookup(4, FoundData);
75+
assert(Found && strcmp(FoundData.Data->Buf, "mystring") == 0);
76+
assert(!DataTable.remove(5));
77+
assert(DataTable.remove(4));
78+
// Test resize.
79+
for (int i = 0; i < 4; ++i) {
80+
MyDataPayload MoreData(new MyData("delete-at-end"));
81+
Added = DataTable.add(i+1, MoreData);
82+
assert(Added);
83+
assert(!DataTable.add(i+1, MoreData));
84+
}
85+
for (int i = 0; i < 4; ++i) {
86+
Found = DataTable.lookup(i+1, FoundData);
87+
assert(Found && strcmp(FoundData.Data->Buf, "delete-at-end") == 0);
88+
}
89+
90+
// Test payload freeing via smart pointer wrapper.
91+
__esan::HashTable<MyDataPayload, MyDataPayload, true> DataKeyTable;
92+
MyDataPayload DataA(new MyData("string AB"));
93+
DataKeyTable.lock();
94+
Added = DataKeyTable.add(DataA, DataA);
95+
assert(Added);
96+
Found = DataKeyTable.lookup(DataA, FoundData);
97+
assert(Found && strcmp(FoundData.Data->Buf, "string AB") == 0);
98+
MyDataPayload DataB(new MyData("string AB"));
99+
Added = DataKeyTable.add(DataB, DataB);
100+
assert(!Added);
101+
DataKeyTable.remove(DataB); // Should free the DataA payload.
102+
DataKeyTable.unlock();
103+
104+
// Test custom functors.
105+
struct CustomHash {
106+
size_t operator()(int Key) const { return Key % 4; }
107+
};
108+
struct CustomEqual {
109+
bool operator()(int Key1, int Key2) const { return Key1 %4 == Key2 % 4; }
110+
};
111+
__esan::HashTable<int, int, false, CustomHash, CustomEqual> ModTable;
112+
Added = ModTable.add(2, 42);
113+
assert(Added);
114+
Added = ModTable.add(6, 42);
115+
assert(!Added);
116+
117+
fprintf(stderr, "All checks passed.\n");
118+
return 0;
119+
}
120+
// CHECK: Deleting mystring.
121+
// CHECK-NEXT: Destructor: mystring.
122+
// CHECK-NEXT: All checks passed.
123+
// CHECK-NEXT: Deleting string AB.
124+
// CHECK-NEXT: Destructor: string AB.
125+
// CHECK-NEXT: Deleting string AB.
126+
// CHECK-NEXT: Destructor: string AB.
127+
// CHECK-NEXT: Deleting delete-at-end.
128+
// CHECK-NEXT: Destructor: delete-at-end.
129+
// CHECK-NEXT: Deleting delete-at-end.
130+
// CHECK-NEXT: Destructor: delete-at-end.
131+
// CHECK-NEXT: Deleting delete-at-end.
132+
// CHECK-NEXT: Destructor: delete-at-end.
133+
// CHECK-NEXT: Deleting delete-at-end.
134+
// CHECK-NEXT: Destructor: delete-at-end.

0 commit comments

Comments
 (0)