1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.google.common.hash;
18
19 import static com.google.common.base.Charsets.UTF_8;
20 import static com.google.common.hash.BloomFilterStrategies.BitArray;
21
22 import com.google.common.collect.ImmutableSet;
23 import com.google.common.math.LongMath;
24 import com.google.common.primitives.Ints;
25 import com.google.common.testing.EqualsTester;
26 import com.google.common.testing.NullPointerTester;
27 import com.google.common.testing.SerializableTester;
28
29 import junit.framework.TestCase;
30
31 import java.io.ByteArrayInputStream;
32 import java.io.ByteArrayOutputStream;
33 import java.math.RoundingMode;
34 import java.util.Random;
35
36 import javax.annotation.Nullable;
37
38
39
40
41
42
43 public class BloomFilterTest extends TestCase {
44 public void testLargeBloomFilterDoesntOverflow() {
45 long numBits = Integer.MAX_VALUE;
46 numBits++;
47
48 BitArray bitArray = new BitArray(numBits);
49 assertTrue(
50 "BitArray.bitSize() must return a positive number, but was " + bitArray.bitSize(),
51 bitArray.bitSize() > 0);
52
53
54
55 }
56
57 public void testCreateAndCheckMitz32BloomFilterWithKnownFalsePositives() {
58 int numInsertions = 1000000;
59 BloomFilter<String> bf = BloomFilter.create(
60 Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
61 BloomFilterStrategies.MURMUR128_MITZ_32);
62
63
64 for (int i = 0; i < numInsertions * 2; i += 2) {
65 bf.put(Integer.toString(i));
66 }
67
68
69 for (int i = 0; i < numInsertions * 2; i += 2) {
70 assertTrue(bf.mightContain(Integer.toString(i)));
71 }
72
73
74
75 ImmutableSet<Integer> falsePositives = ImmutableSet.of(
76 49, 51, 59, 163, 199, 321, 325, 363, 367, 469, 545, 561, 727, 769, 773, 781);
77 for (int i = 1; i < 900; i += 2) {
78 if (!falsePositives.contains(i)) {
79 assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
80 }
81 }
82
83
84 int knownNumberOfFalsePositives = 29824;
85 int numFpp = 0;
86 for (int i = 1; i < numInsertions * 2; i += 2) {
87 if (bf.mightContain(Integer.toString(i))) {
88 numFpp++;
89 }
90 }
91 assertEquals(knownNumberOfFalsePositives, numFpp);
92 double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
93 double expectedFpp = bf.expectedFpp();
94
95 assertEquals(actualFpp, expectedFpp, 0.00015);
96 }
97
98 public void testCreateAndCheckBloomFilterWithKnownFalsePositives64() {
99 int numInsertions = 1000000;
100 BloomFilter<String> bf = BloomFilter.create(
101 Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
102 BloomFilterStrategies.MURMUR128_MITZ_64);
103
104
105 for (int i = 0; i < numInsertions * 2; i += 2) {
106 bf.put(Integer.toString(i));
107 }
108
109
110 for (int i = 0; i < numInsertions * 2; i += 2) {
111 assertTrue(bf.mightContain(Integer.toString(i)));
112 }
113
114
115
116 ImmutableSet<Integer> falsePositives = ImmutableSet.of(
117 15, 25, 287, 319, 381, 399, 421, 465, 529, 697, 767, 857);
118 for (int i = 1; i < 900; i += 2) {
119 if (!falsePositives.contains(i)) {
120 assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
121 }
122 }
123
124
125 int knownNumberOfFalsePositives = 30104;
126 int numFpp = 0;
127 for (int i = 1; i < numInsertions * 2; i += 2) {
128 if (bf.mightContain(Integer.toString(i))) {
129 numFpp++;
130 }
131 }
132 assertEquals(knownNumberOfFalsePositives, numFpp);
133 double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
134 double expectedFpp = bf.expectedFpp();
135
136 assertEquals(actualFpp, expectedFpp, 0.00033);
137 }
138
139 public void testCreateAndCheckBloomFilterWithKnownUtf8FalsePositives64() {
140 int numInsertions = 1000000;
141 BloomFilter<String> bf = BloomFilter.create(
142 Funnels.stringFunnel(UTF_8), numInsertions, 0.03,
143 BloomFilterStrategies.MURMUR128_MITZ_64);
144
145
146 for (int i = 0; i < numInsertions * 2; i += 2) {
147 bf.put(Integer.toString(i));
148 }
149
150
151 for (int i = 0; i < numInsertions * 2; i += 2) {
152 assertTrue(bf.mightContain(Integer.toString(i)));
153 }
154
155
156
157 ImmutableSet<Integer> falsePositives =
158 ImmutableSet.of(129, 471, 723, 89, 751, 835, 871);
159 for (int i = 1; i < 900; i += 2) {
160 if (!falsePositives.contains(i)) {
161 assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
162 }
163 }
164
165
166 int knownNumberOfFalsePositives = 29763;
167 int numFpp = 0;
168 for (int i = 1; i < numInsertions * 2; i += 2) {
169 if (bf.mightContain(Integer.toString(i))) {
170 numFpp++;
171 }
172 }
173 assertEquals(knownNumberOfFalsePositives, numFpp);
174 double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
175 double expectedFpp = bf.expectedFpp();
176
177 assertEquals(actualFpp, expectedFpp, 0.00033);
178 }
179
180
181
182
183 public void testBasic() {
184 for (double fpr = 0.0000001; fpr < 0.1; fpr *= 10) {
185 for (int expectedInsertions = 1; expectedInsertions <= 10000; expectedInsertions *= 10) {
186 checkSanity(BloomFilter.create(HashTestUtils.BAD_FUNNEL, expectedInsertions, fpr));
187 }
188 }
189 }
190
191 public void testPreconditions() {
192 try {
193 BloomFilter.create(Funnels.unencodedCharsFunnel(), -1);
194 fail();
195 } catch (IllegalArgumentException expected) {}
196 try {
197 BloomFilter.create(Funnels.unencodedCharsFunnel(), -1, 0.03);
198 fail();
199 } catch (IllegalArgumentException expected) {}
200 try {
201 BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 0.0);
202 fail();
203 } catch (IllegalArgumentException expected) {}
204 try {
205 BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 1.0);
206 fail();
207 } catch (IllegalArgumentException expected) {}
208 }
209
210 public void testFailureWhenMoreThan255HashFunctionsAreNeeded() {
211 try {
212 int n = 1000;
213 double p = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000001;
214 BloomFilter.create(Funnels.unencodedCharsFunnel(), n, p);
215 fail();
216 } catch (IllegalArgumentException expected) {}
217 }
218
219 public void testNullPointers() {
220 NullPointerTester tester = new NullPointerTester();
221 tester.testAllPublicInstanceMethods(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100));
222 tester.testAllPublicStaticMethods(BloomFilter.class);
223 }
224
225
226
227
228 public void testOptimalHashes() {
229 for (int n = 1; n < 1000; n++) {
230 for (int m = 0; m < 1000; m++) {
231 assertTrue(BloomFilter.optimalNumOfHashFunctions(n, m) > 0);
232 }
233 }
234 }
235
236
237 public void testOptimalNumOfHashFunctionsRounding() {
238 assertEquals(7, BloomFilter.optimalNumOfHashFunctions(319, 3072));
239 }
240
241
242
243
244 public void testOptimalSize() {
245 for (int n = 1; n < 1000; n++) {
246 for (double fpp = Double.MIN_VALUE; fpp < 1.0; fpp += 0.001) {
247 assertTrue(BloomFilter.optimalNumOfBits(n, fpp) >= 0);
248 }
249 }
250
251
252 Random random = new Random(0);
253 for (int repeats = 0; repeats < 10000; repeats++) {
254 assertTrue(BloomFilter.optimalNumOfBits(random.nextInt(1 << 16), random.nextDouble()) >= 0);
255 }
256
257
258 assertEquals(3327428144502L, BloomFilter.optimalNumOfBits(
259 Integer.MAX_VALUE, Double.MIN_VALUE));
260 try {
261 BloomFilter.create(HashTestUtils.BAD_FUNNEL, Integer.MAX_VALUE, Double.MIN_VALUE);
262 fail("we can't represent such a large BF!");
263 } catch (IllegalArgumentException expected) {
264 assertEquals("Could not create BloomFilter of 3327428144502 bits", expected.getMessage());
265 }
266 }
267
268 private void checkSanity(BloomFilter<Object> bf) {
269 assertFalse(bf.mightContain(new Object()));
270 assertFalse(bf.apply(new Object()));
271 for (int i = 0; i < 100; i++) {
272 Object o = new Object();
273 bf.put(o);
274 assertTrue(bf.mightContain(o));
275 assertTrue(bf.apply(o));
276 }
277 }
278
279 public void testCopy() {
280 BloomFilter<String> original = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
281 BloomFilter<String> copy = original.copy();
282 assertNotSame(original, copy);
283 assertEquals(original, copy);
284 }
285
286 public void testExpectedFpp() {
287 BloomFilter<Object> bf = BloomFilter.create(HashTestUtils.BAD_FUNNEL, 10, 0.03);
288 double fpp = bf.expectedFpp();
289 assertEquals(0.0, fpp);
290
291 while (fpp != 1.0) {
292 boolean changed = bf.put(new Object());
293 double newFpp = bf.expectedFpp();
294
295 assertTrue(changed ? newFpp > fpp : newFpp == fpp);
296 fpp = newFpp;
297 }
298 }
299
300 public void testBitSize() {
301 double fpp = 0.03;
302 for (int i = 1; i < 10000; i++) {
303 long numBits = BloomFilter.optimalNumOfBits(i, fpp);
304 int arraySize = Ints.checkedCast(LongMath.divide(numBits, 64, RoundingMode.CEILING));
305 assertEquals(
306 arraySize * Long.SIZE,
307 BloomFilter.create(Funnels.unencodedCharsFunnel(), i, fpp).bitSize());
308 }
309 }
310
311 public void testEquals_empty() {
312 new EqualsTester()
313 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.01))
314 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.02))
315 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.01))
316 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.02))
317 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.01))
318 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.02))
319 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.01))
320 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.02))
321 .testEquals();
322 }
323
324 public void testEquals() {
325 BloomFilter<String> bf1 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
326 bf1.put("1");
327 bf1.put("2");
328
329 BloomFilter<String> bf2 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
330 bf2.put("1");
331 bf2.put("2");
332
333 new EqualsTester()
334 .addEqualityGroup(bf1, bf2)
335 .testEquals();
336
337 bf2.put("3");
338
339 new EqualsTester()
340 .addEqualityGroup(bf1)
341 .addEqualityGroup(bf2)
342 .testEquals();
343 }
344
345 public void testEqualsWithCustomFunnel() {
346 BloomFilter<Long> bf1 = BloomFilter.create(new CustomFunnel(), 100);
347 BloomFilter<Long> bf2 = BloomFilter.create(new CustomFunnel(), 100);
348 assertEquals(bf1, bf2);
349 }
350
351 public void testSerializationWithCustomFunnel() {
352 SerializableTester.reserializeAndAssert(BloomFilter.create(new CustomFunnel(), 100));
353 }
354
355 private static final class CustomFunnel implements Funnel<Long> {
356 @Override
357 public void funnel(Long value, PrimitiveSink into) {
358 into.putLong(value);
359 }
360 @Override
361 public boolean equals(@Nullable Object object) {
362 return (object instanceof CustomFunnel);
363 }
364 @Override
365 public int hashCode() {
366 return 42;
367 }
368 }
369
370 public void testPutReturnValue() {
371 for (int i = 0; i < 10; i++) {
372 BloomFilter<String> bf = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
373 for (int j = 0; j < 10; j++) {
374 String value = new Object().toString();
375 boolean mightContain = bf.mightContain(value);
376 boolean put = bf.put(value);
377 assertTrue(mightContain != put);
378 }
379 }
380 }
381
382 public void testPutAll() {
383 int element1 = 1;
384 int element2 = 2;
385
386 BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 100);
387 bf1.put(element1);
388 assertTrue(bf1.mightContain(element1));
389 assertFalse(bf1.mightContain(element2));
390
391 BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 100);
392 bf2.put(element2);
393 assertFalse(bf2.mightContain(element1));
394 assertTrue(bf2.mightContain(element2));
395
396 assertTrue(bf1.isCompatible(bf2));
397 bf1.putAll(bf2);
398 assertTrue(bf1.mightContain(element1));
399 assertTrue(bf1.mightContain(element2));
400 assertFalse(bf2.mightContain(element1));
401 assertTrue(bf2.mightContain(element2));
402 }
403
404 public void testPutAllDifferentSizes() {
405 BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
406 BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 10);
407
408 try {
409 assertFalse(bf1.isCompatible(bf2));
410 bf1.putAll(bf2);
411 fail();
412 } catch (IllegalArgumentException expected) {
413 }
414
415 try {
416 assertFalse(bf2.isCompatible(bf1));
417 bf2.putAll(bf1);
418 fail();
419 } catch (IllegalArgumentException expected) {
420 }
421 }
422
423 public void testPutAllWithSelf() {
424 BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
425 try {
426 assertFalse(bf1.isCompatible(bf1));
427 bf1.putAll(bf1);
428 fail();
429 } catch (IllegalArgumentException expected) {
430 }
431 }
432
433 public void testJavaSerialization() {
434 BloomFilter<byte[]> bf = BloomFilter.create(Funnels.byteArrayFunnel(), 100);
435 for (int i = 0; i < 10; i++) {
436 bf.put(Ints.toByteArray(i));
437 }
438
439 BloomFilter<byte[]> copy = SerializableTester.reserialize(bf);
440 for (int i = 0; i < 10; i++) {
441 assertTrue(copy.mightContain(Ints.toByteArray(i)));
442 }
443 assertEquals(bf.expectedFpp(), copy.expectedFpp());
444
445 SerializableTester.reserializeAndAssert(bf);
446 }
447
448 public void testCustomSerialization() throws Exception {
449 Funnel<byte[]> funnel = Funnels.byteArrayFunnel();
450 BloomFilter<byte[]> bf = BloomFilter.create(funnel, 100);
451 for (int i = 0; i < 100; i++) {
452 bf.put(Ints.toByteArray(i));
453 }
454
455 ByteArrayOutputStream out = new ByteArrayOutputStream();
456 bf.writeTo(out);
457
458 assertEquals(bf, BloomFilter.readFrom(new ByteArrayInputStream(out.toByteArray()), funnel));
459 }
460
461
462
463
464
465 public void testBloomFilterStrategies() {
466 assertEquals(2, BloomFilterStrategies.values().length);
467 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32, BloomFilterStrategies.values()[0]);
468 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64, BloomFilterStrategies.values()[1]);
469 }
470 }