View Javadoc
1   /*
2    * Copyright (C) 2011 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package com.google.common.hash;
18  
19  import static com.google.common.base.Charsets.UTF_8;
20  import static com.google.common.hash.BloomFilterStrategies.BitArray;
21  
22  import com.google.common.collect.ImmutableSet;
23  import com.google.common.math.LongMath;
24  import com.google.common.primitives.Ints;
25  import com.google.common.testing.EqualsTester;
26  import com.google.common.testing.NullPointerTester;
27  import com.google.common.testing.SerializableTester;
28  
29  import junit.framework.TestCase;
30  
31  import java.io.ByteArrayInputStream;
32  import java.io.ByteArrayOutputStream;
33  import java.math.RoundingMode;
34  import java.util.Random;
35  
36  import javax.annotation.Nullable;
37  
38  /**
39   * Tests for SimpleGenericBloomFilter and derived BloomFilter views.
40   *
41   * @author Dimitris Andreou
42   */
43  public class BloomFilterTest extends TestCase {
44    public void testLargeBloomFilterDoesntOverflow() {
45      long numBits = Integer.MAX_VALUE;
46      numBits++;
47  
48      BitArray bitArray = new BitArray(numBits);
49      assertTrue(
50          "BitArray.bitSize() must return a positive number, but was " + bitArray.bitSize(),
51          bitArray.bitSize() > 0);
52  
53      // Ideally we would also test the bitSize() overflow of this BF, but it runs out of heap space
54      // BloomFilter.create(Funnels.unencodedCharsFunnel(), 244412641, 1e-11);
55    }
56  
57    public void testCreateAndCheckMitz32BloomFilterWithKnownFalsePositives() {
58      int numInsertions = 1000000;
59      BloomFilter<String> bf = BloomFilter.create(
60          Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
61          BloomFilterStrategies.MURMUR128_MITZ_32);
62  
63      // Insert "numInsertions" even numbers into the BF.
64      for (int i = 0; i < numInsertions * 2; i += 2) {
65        bf.put(Integer.toString(i));
66      }
67  
68      // Assert that the BF "might" have all of the even numbers.
69      for (int i = 0; i < numInsertions * 2; i += 2) {
70        assertTrue(bf.mightContain(Integer.toString(i)));
71      }
72  
73      // Now we check for known false positives using a set of known false positives.
74      // (These are all of the false positives under 900.)
75      ImmutableSet<Integer> falsePositives = ImmutableSet.of(
76          49, 51, 59, 163, 199, 321, 325, 363, 367, 469, 545, 561, 727, 769, 773, 781);
77      for (int i = 1; i < 900; i += 2) {
78        if (!falsePositives.contains(i)) {
79          assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
80        }
81      }
82  
83      // Check that there are exactly 29824 false positives for this BF.
84      int knownNumberOfFalsePositives = 29824;
85      int numFpp = 0;
86      for (int i = 1; i < numInsertions * 2; i += 2) {
87        if (bf.mightContain(Integer.toString(i))) {
88          numFpp++;
89        }
90      }
91      assertEquals(knownNumberOfFalsePositives, numFpp);
92      double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
93      double expectedFpp = bf.expectedFpp();
94      // The normal order of (expected, actual) is reversed here on purpose.
95      assertEquals(actualFpp, expectedFpp, 0.00015);
96    }
97  
98    public void testCreateAndCheckBloomFilterWithKnownFalsePositives64() {
99      int numInsertions = 1000000;
100     BloomFilter<String> bf = BloomFilter.create(
101         Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
102         BloomFilterStrategies.MURMUR128_MITZ_64);
103 
104     // Insert "numInsertions" even numbers into the BF.
105     for (int i = 0; i < numInsertions * 2; i += 2) {
106       bf.put(Integer.toString(i));
107     }
108 
109     // Assert that the BF "might" have all of the even numbers.
110     for (int i = 0; i < numInsertions * 2; i += 2) {
111       assertTrue(bf.mightContain(Integer.toString(i)));
112     }
113 
114     // Now we check for known false positives using a set of known false positives.
115     // (These are all of the false positives under 900.)
116     ImmutableSet<Integer> falsePositives = ImmutableSet.of(
117         15, 25, 287, 319, 381, 399, 421, 465, 529, 697, 767, 857);
118     for (int i = 1; i < 900; i += 2) {
119       if (!falsePositives.contains(i)) {
120         assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
121       }
122     }
123 
124     // Check that there are exactly 30104 false positives for this BF.
125     int knownNumberOfFalsePositives = 30104;
126     int numFpp = 0;
127     for (int i = 1; i < numInsertions * 2; i += 2) {
128       if (bf.mightContain(Integer.toString(i))) {
129         numFpp++;
130       }
131     }
132     assertEquals(knownNumberOfFalsePositives, numFpp);
133     double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
134     double expectedFpp = bf.expectedFpp();
135     // The normal order of (expected, actual) is reversed here on purpose.
136     assertEquals(actualFpp, expectedFpp, 0.00033);
137   }
138 
139   public void testCreateAndCheckBloomFilterWithKnownUtf8FalsePositives64() {
140     int numInsertions = 1000000;
141     BloomFilter<String> bf = BloomFilter.create(
142         Funnels.stringFunnel(UTF_8), numInsertions, 0.03,
143         BloomFilterStrategies.MURMUR128_MITZ_64);
144 
145     // Insert "numInsertions" even numbers into the BF.
146     for (int i = 0; i < numInsertions * 2; i += 2) {
147       bf.put(Integer.toString(i));
148     }
149 
150     // Assert that the BF "might" have all of the even numbers.
151     for (int i = 0; i < numInsertions * 2; i += 2) {
152       assertTrue(bf.mightContain(Integer.toString(i)));
153     }
154 
155     // Now we check for known false positives using a set of known false positives.
156     // (These are all of the false positives under 900.)
157     ImmutableSet<Integer> falsePositives =
158         ImmutableSet.of(129, 471, 723, 89, 751, 835, 871);
159     for (int i = 1; i < 900; i += 2) {
160       if (!falsePositives.contains(i)) {
161         assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
162       }
163     }
164 
165     // Check that there are exactly 29763 false positives for this BF.
166     int knownNumberOfFalsePositives = 29763;
167     int numFpp = 0;
168     for (int i = 1; i < numInsertions * 2; i += 2) {
169       if (bf.mightContain(Integer.toString(i))) {
170         numFpp++;
171       }
172     }
173     assertEquals(knownNumberOfFalsePositives, numFpp);
174     double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
175     double expectedFpp = bf.expectedFpp();
176     // The normal order of (expected, actual) is reversed here on purpose.
177     assertEquals(actualFpp, expectedFpp, 0.00033);
178   }
179 
180   /**
181    * Sanity checking with many combinations of false positive rates and expected insertions
182    */
183   public void testBasic() {
184     for (double fpr = 0.0000001; fpr < 0.1; fpr *= 10) {
185       for (int expectedInsertions = 1; expectedInsertions <= 10000; expectedInsertions *= 10) {
186         checkSanity(BloomFilter.create(HashTestUtils.BAD_FUNNEL, expectedInsertions, fpr));
187       }
188     }
189   }
190 
191   public void testPreconditions() {
192     try {
193       BloomFilter.create(Funnels.unencodedCharsFunnel(), -1);
194       fail();
195     } catch (IllegalArgumentException expected) {}
196     try {
197       BloomFilter.create(Funnels.unencodedCharsFunnel(), -1, 0.03);
198       fail();
199     } catch (IllegalArgumentException expected) {}
200     try {
201       BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 0.0);
202       fail();
203     } catch (IllegalArgumentException expected) {}
204     try {
205       BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 1.0);
206       fail();
207     } catch (IllegalArgumentException expected) {}
208   }
209 
210   public void testFailureWhenMoreThan255HashFunctionsAreNeeded() {
211     try {
212       int n = 1000;
213       double p = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000001;
214       BloomFilter.create(Funnels.unencodedCharsFunnel(), n, p);
215       fail();
216     } catch (IllegalArgumentException expected) {}
217   }
218 
219   public void testNullPointers() {
220     NullPointerTester tester = new NullPointerTester();
221     tester.testAllPublicInstanceMethods(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100));
222     tester.testAllPublicStaticMethods(BloomFilter.class);
223   }
224 
225   /**
226    * Tests that we never get an optimal hashes number of zero.
227    */
228   public void testOptimalHashes() {
229     for (int n = 1; n < 1000; n++) {
230       for (int m = 0; m < 1000; m++) {
231         assertTrue(BloomFilter.optimalNumOfHashFunctions(n, m) > 0);
232       }
233     }
234   }
235 
236   // https://code.google.com/p/guava-libraries/issues/detail?id=1781
237   public void testOptimalNumOfHashFunctionsRounding() {
238     assertEquals(7, BloomFilter.optimalNumOfHashFunctions(319, 3072));
239   }
240 
241   /**
242    * Tests that we always get a non-negative optimal size.
243    */
244   public void testOptimalSize() {
245     for (int n = 1; n < 1000; n++) {
246       for (double fpp = Double.MIN_VALUE; fpp < 1.0; fpp += 0.001) {
247         assertTrue(BloomFilter.optimalNumOfBits(n, fpp) >= 0);
248       }
249     }
250 
251     // some random values
252     Random random = new Random(0);
253     for (int repeats = 0; repeats < 10000; repeats++) {
254       assertTrue(BloomFilter.optimalNumOfBits(random.nextInt(1 << 16), random.nextDouble()) >= 0);
255     }
256 
257     // and some crazy values (this used to be capped to Integer.MAX_VALUE, now it can go bigger
258     assertEquals(3327428144502L, BloomFilter.optimalNumOfBits(
259         Integer.MAX_VALUE, Double.MIN_VALUE));
260     try {
261       BloomFilter.create(HashTestUtils.BAD_FUNNEL, Integer.MAX_VALUE, Double.MIN_VALUE);
262       fail("we can't represent such a large BF!");
263     } catch (IllegalArgumentException expected) {
264       assertEquals("Could not create BloomFilter of 3327428144502 bits", expected.getMessage());
265     }
266   }
267 
268   private void checkSanity(BloomFilter<Object> bf) {
269     assertFalse(bf.mightContain(new Object()));
270     assertFalse(bf.apply(new Object()));
271     for (int i = 0; i < 100; i++) {
272       Object o = new Object();
273       bf.put(o);
274       assertTrue(bf.mightContain(o));
275       assertTrue(bf.apply(o));
276     }
277   }
278 
279   public void testCopy() {
280     BloomFilter<String> original = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
281     BloomFilter<String> copy = original.copy();
282     assertNotSame(original, copy);
283     assertEquals(original, copy);
284   }
285 
286   public void testExpectedFpp() {
287     BloomFilter<Object> bf = BloomFilter.create(HashTestUtils.BAD_FUNNEL, 10, 0.03);
288     double fpp = bf.expectedFpp();
289     assertEquals(0.0, fpp);
290     // usually completed in less than 200 iterations
291     while (fpp != 1.0) {
292       boolean changed = bf.put(new Object());
293       double newFpp = bf.expectedFpp();
294       // if changed, the new fpp is strictly higher, otherwise it is the same
295       assertTrue(changed ? newFpp > fpp : newFpp == fpp);
296       fpp = newFpp;
297     }
298   }
299 
300   public void testBitSize() {
301     double fpp = 0.03;
302     for (int i = 1; i < 10000; i++) {
303       long numBits = BloomFilter.optimalNumOfBits(i, fpp);
304       int arraySize = Ints.checkedCast(LongMath.divide(numBits, 64, RoundingMode.CEILING));
305       assertEquals(
306           arraySize * Long.SIZE,
307           BloomFilter.create(Funnels.unencodedCharsFunnel(), i, fpp).bitSize());
308     }
309   }
310 
311   public void testEquals_empty() {
312     new EqualsTester()
313         .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.01))
314         .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.02))
315         .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.01))
316         .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.02))
317         .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.01))
318         .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.02))
319         .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.01))
320         .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.02))
321         .testEquals();
322   }
323 
324   public void testEquals() {
325     BloomFilter<String> bf1 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
326     bf1.put("1");
327     bf1.put("2");
328 
329     BloomFilter<String> bf2 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
330     bf2.put("1");
331     bf2.put("2");
332 
333     new EqualsTester()
334         .addEqualityGroup(bf1, bf2)
335         .testEquals();
336 
337     bf2.put("3");
338 
339     new EqualsTester()
340         .addEqualityGroup(bf1)
341         .addEqualityGroup(bf2)
342         .testEquals();
343   }
344 
345   public void testEqualsWithCustomFunnel() {
346     BloomFilter<Long> bf1 = BloomFilter.create(new CustomFunnel(), 100);
347     BloomFilter<Long> bf2 = BloomFilter.create(new CustomFunnel(), 100);
348     assertEquals(bf1, bf2);
349   }
350 
351   public void testSerializationWithCustomFunnel() {
352     SerializableTester.reserializeAndAssert(BloomFilter.create(new CustomFunnel(), 100));
353   }
354 
355   private static final class CustomFunnel implements Funnel<Long> {
356     @Override
357     public void funnel(Long value, PrimitiveSink into) {
358       into.putLong(value);
359     }
360     @Override
361     public boolean equals(@Nullable Object object) {
362       return (object instanceof CustomFunnel);
363     }
364     @Override
365     public int hashCode() {
366       return 42;
367     }
368   }
369 
370   public void testPutReturnValue() {
371     for (int i = 0; i < 10; i++) {
372       BloomFilter<String> bf = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
373       for (int j = 0; j < 10; j++) {
374         String value = new Object().toString();
375         boolean mightContain = bf.mightContain(value);
376         boolean put = bf.put(value);
377         assertTrue(mightContain != put);
378       }
379     }
380   }
381 
382   public void testPutAll() {
383     int element1 = 1;
384     int element2 = 2;
385 
386     BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 100);
387     bf1.put(element1);
388     assertTrue(bf1.mightContain(element1));
389     assertFalse(bf1.mightContain(element2));
390 
391     BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 100);
392     bf2.put(element2);
393     assertFalse(bf2.mightContain(element1));
394     assertTrue(bf2.mightContain(element2));
395 
396     assertTrue(bf1.isCompatible(bf2));
397     bf1.putAll(bf2);
398     assertTrue(bf1.mightContain(element1));
399     assertTrue(bf1.mightContain(element2));
400     assertFalse(bf2.mightContain(element1));
401     assertTrue(bf2.mightContain(element2));
402   }
403 
404   public void testPutAllDifferentSizes() {
405     BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
406     BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 10);
407 
408     try {
409       assertFalse(bf1.isCompatible(bf2));
410       bf1.putAll(bf2);
411       fail();
412     } catch (IllegalArgumentException expected) {
413     }
414 
415     try {
416       assertFalse(bf2.isCompatible(bf1));
417       bf2.putAll(bf1);
418       fail();
419     } catch (IllegalArgumentException expected) {
420     }
421   }
422 
423   public void testPutAllWithSelf() {
424     BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
425     try {
426       assertFalse(bf1.isCompatible(bf1));
427       bf1.putAll(bf1);
428       fail();
429     } catch (IllegalArgumentException expected) {
430     }
431   }
432 
433   public void testJavaSerialization() {
434     BloomFilter<byte[]> bf = BloomFilter.create(Funnels.byteArrayFunnel(), 100);
435     for (int i = 0; i < 10; i++) {
436       bf.put(Ints.toByteArray(i));
437     }
438 
439     BloomFilter<byte[]> copy = SerializableTester.reserialize(bf);
440     for (int i = 0; i < 10; i++) {
441       assertTrue(copy.mightContain(Ints.toByteArray(i)));
442     }
443     assertEquals(bf.expectedFpp(), copy.expectedFpp());
444 
445     SerializableTester.reserializeAndAssert(bf);
446   }
447 
448   public void testCustomSerialization() throws Exception {
449     Funnel<byte[]> funnel = Funnels.byteArrayFunnel();
450     BloomFilter<byte[]> bf = BloomFilter.create(funnel, 100);
451     for (int i = 0; i < 100; i++) {
452       bf.put(Ints.toByteArray(i));
453     }
454 
455     ByteArrayOutputStream out = new ByteArrayOutputStream();
456     bf.writeTo(out);
457 
458     assertEquals(bf, BloomFilter.readFrom(new ByteArrayInputStream(out.toByteArray()), funnel));
459   }
460 
461   /**
462    * This test will fail whenever someone updates/reorders the BloomFilterStrategies constants.
463    * Only appending a new constant is allowed.
464    */
465   public void testBloomFilterStrategies() {
466     assertEquals(2, BloomFilterStrategies.values().length);
467     assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32, BloomFilterStrategies.values()[0]);
468     assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64, BloomFilterStrategies.values()[1]);
469   }
470 }