View Javadoc
1   /*
2    * Copyright (C) 2011 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package com.google.common.hash;
18  
19  import static org.junit.Assert.assertEquals;
20  
21  import com.google.common.base.Charsets;
22  import com.google.common.collect.ImmutableSet;
23  import com.google.common.collect.Sets;
24  import com.google.common.primitives.Ints;
25  import com.google.common.testing.EqualsTester;
26  
27  import org.junit.Assert;
28  
29  import java.nio.charset.Charset;
30  import java.util.Arrays;
31  import java.util.Random;
32  import java.util.Set;
33  
34  /**
35   * Various utilities for testing {@link HashFunction}s.
36   *
37   * @author Dimitris Andreou
38   * @author Kurt Alfred Kluever
39   */
40  final class HashTestUtils {
41    private HashTestUtils() {}
42  
43    /**
44     * Converts a string, which should contain only ascii-representable characters, to a byte[].
45     */
46    static byte[] ascii(String string) {
47      byte[] bytes = new byte[string.length()];
48      for (int i = 0; i < string.length(); i++) {
49        bytes[i] = (byte) string.charAt(i);
50      }
51      return bytes;
52    }
53  
54    interface HashFn {
55      byte[] hash(byte[] input, int seed);
56    }
57  
58    static void verifyHashFunction(HashFn hashFunction, int hashbits, int expected) {
59      int hashBytes = hashbits / 8;
60  
61      byte[] key = new byte[256];
62      byte[] hashes = new byte[hashBytes * 256];
63  
64      // Hash keys of the form {}, {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as the seed
65      for (int i = 0; i < 256; i++) {
66        key[i] = (byte) i;
67        int seed = 256 - i;
68        byte[] hash = hashFunction.hash(Arrays.copyOf(key, i), seed);
69        System.arraycopy(hash, 0, hashes, i * hashBytes, hash.length);
70      }
71  
72      // Then hash the result array
73      byte[] result = hashFunction.hash(hashes, 0);
74  
75      // interpreted in little-endian order.
76      int verification = Integer.reverseBytes(Ints.fromByteArray(result));
77  
78      if (expected != verification) {
79        throw new AssertionError("Expected: " + Integer.toHexString(expected)
80            + " got: " + Integer.toHexString(verification));
81      }
82    }
83  
84    static final Funnel<Object> BAD_FUNNEL = new Funnel<Object>() {
85      @Override public void funnel(Object object, PrimitiveSink bytePrimitiveSink) {
86        bytePrimitiveSink.putInt(object.hashCode());
87      }
88    };
89  
90    static enum RandomHasherAction {
91      PUT_BOOLEAN() {
92        @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
93          boolean value = random.nextBoolean();
94          for (PrimitiveSink sink : sinks) {
95            sink.putBoolean(value);
96          }
97        }
98      },
99      PUT_BYTE() {
100       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
101         int value = random.nextInt();
102         for (PrimitiveSink sink : sinks) {
103           sink.putByte((byte) value);
104         }
105       }
106     },
107     PUT_SHORT() {
108       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
109         short value = (short) random.nextInt();
110         for (PrimitiveSink sink : sinks) {
111           sink.putShort(value);
112         }
113       }
114     },
115     PUT_CHAR() {
116       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
117         char value = (char) random.nextInt();
118         for (PrimitiveSink sink : sinks) {
119           sink.putChar(value);
120         }
121       }
122     },
123     PUT_INT() {
124       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
125         int value = random.nextInt();
126         for (PrimitiveSink sink : sinks) {
127           sink.putInt(value);
128         }
129       }
130     },
131     PUT_LONG() {
132       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
133         long value = random.nextLong();
134         for (PrimitiveSink sink : sinks) {
135           sink.putLong(value);
136         }
137       }
138     },
139     PUT_FLOAT() {
140       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
141         float value = random.nextFloat();
142         for (PrimitiveSink sink : sinks) {
143           sink.putFloat(value);
144         }
145       }
146     },
147     PUT_DOUBLE() {
148       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
149         double value = random.nextDouble();
150         for (PrimitiveSink sink : sinks) {
151           sink.putDouble(value);
152         }
153       }
154     },
155     PUT_BYTES() {
156       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
157         byte[] value = new byte[random.nextInt(128)];
158         random.nextBytes(value);
159         for (PrimitiveSink sink : sinks) {
160           sink.putBytes(value);
161         }
162       }
163     },
164     PUT_BYTES_INT_INT() {
165       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
166         byte[] value = new byte[random.nextInt(128)];
167         random.nextBytes(value);
168         int off = random.nextInt(value.length + 1);
169         int len = random.nextInt(value.length - off + 1);
170         for (PrimitiveSink sink : sinks) {
171           sink.putBytes(value);
172         }
173       }
174     },
175     PUT_STRING() {
176       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
177         char[] value = new char[random.nextInt(128)];
178         for (int i = 0; i < value.length; i++) {
179           value[i] = (char) random.nextInt();
180         }
181         String s = new String(value);
182         for (PrimitiveSink sink : sinks) {
183           sink.putUnencodedChars(s);
184         }
185       }
186     },
187     PUT_STRING_LOW_SURROGATE() {
188       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
189         String s = new String(new char[] { randomLowSurrogate(random) });
190         for (PrimitiveSink sink : sinks) {
191           sink.putUnencodedChars(s);
192         }
193       }
194     },
195     PUT_STRING_HIGH_SURROGATE() {
196       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
197         String s = new String(new char[] { randomHighSurrogate(random) });
198         for (PrimitiveSink sink : sinks) {
199           sink.putUnencodedChars(s);
200         }
201       }
202     },
203     PUT_STRING_LOW_HIGH_SURROGATE() {
204       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
205         String s = new String(new char[] {
206             randomLowSurrogate(random), randomHighSurrogate(random)});
207         for (PrimitiveSink sink : sinks) {
208           sink.putUnencodedChars(s);
209         }
210       }
211     },
212     PUT_STRING_HIGH_LOW_SURROGATE() {
213       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
214         String s = new String(new char[] {
215             randomHighSurrogate(random), randomLowSurrogate(random)});
216         for (PrimitiveSink sink : sinks) {
217           sink.putUnencodedChars(s);
218         }
219       }
220     };
221 
222     abstract void performAction(Random random, Iterable<? extends PrimitiveSink> sinks);
223 
224     private static final RandomHasherAction[] actions = values();
225 
226     static RandomHasherAction pickAtRandom(Random random) {
227       return actions[random.nextInt(actions.length)];
228     }
229   }
230   
231   /**
232    * Test that the hash function contains no funnels. A funnel is a situation where a set of input
233    * (key) bits 'affects' a strictly smaller set of output bits. Funneling is bad because it can
234    * result in more-than-ideal collisions for a non-uniformly distributed key space. In practice,
235    * most key spaces are ANYTHING BUT uniformly distributed. A bit(i) in the input is said to
236    * 'affect' a bit(j) in the output if two inputs, identical but for bit(i), will differ at output
237    * bit(j) about half the time
238    *
239    * <p>Funneling is pretty simple to detect. The key idea is to find example keys which
240    * unequivocably demonstrate that funneling cannot be occuring. This is done bit-by-bit. For
241    * each input bit(i) and output bit(j), two pairs of keys must be found with all bits identical
242    * except bit(i). One pair must differ in output bit(j), and one pair must not. This proves that
243    * input bit(i) can alter output bit(j).
244    */
245   static void checkNoFunnels(HashFunction function) {
246     Random rand = new Random(0);
247     int keyBits = 32;
248     int hashBits = function.bits();
249 
250     // output loop tests input bit
251     for (int i = 0; i < keyBits; i++) {
252       int same = 0x0; // bitset for output bits with same values
253       int diff = 0x0; // bitset for output bits with different values
254       int count = 0;
255       // originally was 2 * Math.log(...), making it try more times to avoid flakiness issues
256       int maxCount = (int) (4 * Math.log(2 * keyBits * hashBits) + 1);
257       while (same != 0xffffffff || diff != 0xffffffff) {
258         int key1 = rand.nextInt();
259         // flip input bit for key2
260         int key2 = key1 ^ (1 << i);
261         // get hashes
262         int hash1 = function.hashInt(key1).asInt();
263         int hash2 = function.hashInt(key2).asInt();
264         // test whether the hash values have same output bits
265         same |= ~(hash1 ^ hash2);
266         // test whether the hash values have different output bits
267         diff |= (hash1 ^ hash2);
268 
269         count++;
270         // check whether we've exceeded the probabilistically
271         // likely number of trials to have proven no funneling
272         if (count > maxCount) {
273           Assert.fail("input bit(" + i + ") was found not to affect all " +
274                hashBits + " output bits; The unaffected bits are " +
275                "as follows: " + ~(same & diff) + ". This was " +
276                "determined after " + count + " trials.");
277         }
278       }
279     }
280   }
281 
282   /**
283    * Test for avalanche. Avalanche means that output bits differ with roughly 1/2 probability on
284    * different input keys. This test verifies that each possible 1-bit key delta achieves avalanche.
285    *
286    * <p>For more information: http://burtleburtle.net/bob/hash/avalanche.html
287    */
288   static void checkAvalanche(HashFunction function, int trials, double epsilon) {
289     Random rand = new Random(0);
290     int keyBits = 32;
291     int hashBits = function.bits();
292     for (int i = 0; i < keyBits; i++) {
293       int[] same = new int[hashBits];
294       int[] diff = new int[hashBits];
295       // go through trials to compute probability
296       for (int j = 0; j < trials; j++) {
297         int key1 = rand.nextInt();
298         // flip input bit for key2
299         int key2 = key1 ^ (1 << i);
300         // compute hash values
301         int hash1 = function.hashInt(key1).asInt();
302         int hash2 = function.hashInt(key2).asInt();
303         for (int k = 0; k < hashBits; k++) {
304           if ((hash1 & (1 << k)) == (hash2 & (1 << k))) {
305             same[k] += 1;
306           } else {
307             diff[k] += 1;
308           }
309         }
310       }
311       // measure probability and assert it's within margin of error
312       for (int j = 0; j < hashBits; j++) {
313         double prob = (double) diff[j] / (double) (diff[j] + same[j]);
314         Assert.assertEquals(0.50d, prob, epsilon);
315       }
316     }
317   }
318 
319   /**
320    * Test for 2-bit characteristics. A characteristic is a delta in the input which is repeated in
321    * the output. For example, if f() is a block cipher and c is a characteristic, then
322    * f(x^c) = f(x)^c with greater than expected probability. The test for funneling is merely a test
323    * for 1-bit characteristics.
324    *
325    * <p>There is more general code provided by Bob Jenkins to test arbitrarily sized characteristics
326    * using the magic of gaussian elimination: http://burtleburtle.net/bob/crypto/findingc.html.
327    */
328   static void checkNo2BitCharacteristics(HashFunction function) {
329     Random rand = new Random(0);
330     int keyBits = 32;
331 
332     // get every one of (keyBits choose 2) deltas:
333     for (int i = 0; i < keyBits; i++) {
334       for (int j = 0; j < keyBits; j++) {
335         if (j <= i) continue;
336         int count = 0;
337         int maxCount = 20; // the probability of error here is miniscule
338         boolean diff = false;
339 
340         while (!diff) {
341           int delta = (1 << i) | (1 << j);
342           int key1 = rand.nextInt();
343           // apply delta
344           int key2 = key1 ^ delta;
345 
346           // get hashes
347           int hash1 = function.hashInt(key1).asInt();
348           int hash2 = function.hashInt(key2).asInt();
349 
350           // this 2-bit candidate delta is not a characteristic
351           // if deltas are different
352           if ((hash1 ^ hash2) != delta) {
353             diff = true;
354             continue;
355           }
356 
357           // check if we've exceeded the probabilistically
358           // likely number of trials to have proven 2-bit candidate
359           // is not a characteristic
360           count++;
361           if (count > maxCount) {
362             Assert.fail("2-bit delta (" + i + ", " + j + ") is likely a " +
363                  "characteristic for this hash. This was " +
364                  "determined after " + count + " trials");
365           }
366         }
367       }
368     }
369   }
370 
371   /**
372    * Test for avalanche with 2-bit deltas. Most probabilities of output bit(j) differing are well
373    * within 50%.
374    */
375   static void check2BitAvalanche(HashFunction function, int trials, double epsilon) {
376     Random rand = new Random(0);
377     int keyBits = 32;
378     int hashBits = function.bits();
379     for (int bit1 = 0; bit1 < keyBits; bit1++) {
380       for (int bit2 = 0; bit2 < keyBits; bit2++) {
381         if (bit2 <= bit1) continue;
382         int delta = (1 << bit1) | (1 << bit2);
383         int[] same = new int[hashBits];
384         int[] diff = new int[hashBits];
385         // go through trials to compute probability
386         for (int j = 0; j < trials; j++) {
387           int key1 = rand.nextInt();
388           // flip input bit for key2
389           int key2 = key1 ^ delta;
390           // compute hash values
391           int hash1 = function.hashInt(key1).asInt();
392           int hash2 = function.hashInt(key2).asInt();
393           for (int k = 0; k < hashBits; k++) {
394             if ((hash1 & (1 << k)) == (hash2 & (1 << k))) {
395               same[k] += 1;
396             } else {
397               diff[k] += 1;
398             }
399           }
400         }
401         // measure probability and assert it's within margin of error
402         for (int j = 0; j < hashBits; j++) {
403           double prob = (double) diff[j] / (double) (diff[j] + same[j]);
404           Assert.assertEquals(0.50d, prob, epsilon);
405         }
406       }
407     }
408   }
409 
410   /**
411    * Checks that a Hasher returns the same HashCode when given the same input, and also
412    * that the collision rate looks sane.
413    */
414   static void assertInvariants(HashFunction hashFunction) {
415     int objects = 100;
416     Set<HashCode> hashcodes = Sets.newHashSetWithExpectedSize(objects);
417     for (int i = 0; i < objects; i++) {
418       Object o = new Object();
419       HashCode hashcode1 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL);
420       HashCode hashcode2 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL);
421       Assert.assertEquals(hashcode1, hashcode2); // idempotent
422       Assert.assertEquals(hashFunction.bits(), hashcode1.bits());
423       Assert.assertEquals(hashFunction.bits(), hashcode1.asBytes().length * 8);
424       hashcodes.add(hashcode1);
425     }
426     Assert.assertTrue(hashcodes.size() > objects * 0.95); // quite relaxed test
427 
428     assertHashBytesThrowsCorrectExceptions(hashFunction);
429     assertIndependentHashers(hashFunction);
430     assertShortcutsAreEquivalent(hashFunction, 512);
431   }
432 
433   static void assertHashBytesThrowsCorrectExceptions(HashFunction hashFunction) {
434     hashFunction.hashBytes(new byte[64], 0, 0);
435 
436     try {
437       hashFunction.hashBytes(new byte[128], -1, 128);
438       Assert.fail();
439     } catch (IndexOutOfBoundsException expected) {}
440     try {
441       hashFunction.hashBytes(new byte[128], 64, 256 /* too long len */);
442       Assert.fail();
443     } catch (IndexOutOfBoundsException expected) {}
444     try {
445       hashFunction.hashBytes(new byte[64], 0, -1);
446       Assert.fail();
447     } catch (IndexOutOfBoundsException expected) {}
448   }
449 
450   static void assertIndependentHashers(HashFunction hashFunction) {
451     int numActions = 100;
452     // hashcodes from non-overlapping hash computations
453     HashCode expected1 = randomHash(hashFunction, new Random(1L), numActions);
454     HashCode expected2 = randomHash(hashFunction, new Random(2L), numActions);
455 
456     // equivalent, but overlapping, computations (should produce the same results as above)
457     Random random1 = new Random(1L);
458     Random random2 = new Random(2L);
459     Hasher hasher1 = hashFunction.newHasher();
460     Hasher hasher2 = hashFunction.newHasher();
461     for (int i = 0; i < numActions; i++) {
462       RandomHasherAction.pickAtRandom(random1).performAction(random1, ImmutableSet.of(hasher1));
463       RandomHasherAction.pickAtRandom(random2).performAction(random2, ImmutableSet.of(hasher2));
464     }
465 
466     Assert.assertEquals(expected1, hasher1.hash());
467     Assert.assertEquals(expected2, hasher2.hash());
468   }
469 
470   static HashCode randomHash(HashFunction hashFunction, Random random, int numActions) {
471     Hasher hasher = hashFunction.newHasher();
472     for (int i = 0; i < numActions; i++) {
473       RandomHasherAction.pickAtRandom(random).performAction(random, ImmutableSet.of(hasher));
474     }
475     return hasher.hash();
476   }
477 
478   private static void assertShortcutsAreEquivalent(HashFunction hashFunction, int trials) {
479     Random random = new Random(42085L);
480     for (int i = 0; i < trials; i++) {
481       assertHashBytesEquivalence(hashFunction, random);
482       assertHashIntEquivalence(hashFunction, random);
483       assertHashLongEquivalence(hashFunction, random);
484       assertHashStringEquivalence(hashFunction, random);
485       assertHashStringWithSurrogatesEquivalence(hashFunction, random);
486     }
487   }
488 
489   private static void assertHashBytesEquivalence(HashFunction hashFunction, Random random) {
490     int size = random.nextInt(2048);
491     byte[] bytes = new byte[size];
492     random.nextBytes(bytes);
493     assertEquals(hashFunction.hashBytes(bytes),
494         hashFunction.newHasher(size).putBytes(bytes).hash());
495     int off = random.nextInt(size);
496     int len = random.nextInt(size - off);
497     assertEquals(hashFunction.hashBytes(bytes, off, len),
498         hashFunction.newHasher(size).putBytes(bytes, off, len).hash());
499   }
500 
501   private static void assertHashIntEquivalence(HashFunction hashFunction, Random random) {
502     int i = random.nextInt();
503     assertEquals(hashFunction.hashInt(i),
504         hashFunction.newHasher().putInt(i).hash());
505   }
506 
507   private static void assertHashLongEquivalence(HashFunction hashFunction, Random random) {
508     long l = random.nextLong();
509     assertEquals(hashFunction.hashLong(l),
510         hashFunction.newHasher().putLong(l).hash());
511   }
512 
513   private static final ImmutableSet<Charset> CHARSETS = ImmutableSet.of(
514       Charsets.ISO_8859_1,
515       Charsets.US_ASCII,
516       Charsets.UTF_16,
517       Charsets.UTF_16BE,
518       Charsets.UTF_16LE,
519       Charsets.UTF_8);
520 
521   private static void assertHashStringEquivalence(HashFunction hashFunction, Random random) {
522     // Test that only data and data-order is important, not the individual operations.
523     new EqualsTester()
524         .addEqualityGroup(
525             hashFunction.hashUnencodedChars("abc"),
526             hashFunction.newHasher().putUnencodedChars("abc").hash(),
527             hashFunction.newHasher().putUnencodedChars("ab").putUnencodedChars("c").hash(),
528             hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("bc").hash(),
529             hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("b")
530                 .putUnencodedChars("c").hash(),
531             hashFunction.newHasher().putChar('a').putUnencodedChars("bc").hash(),
532             hashFunction.newHasher().putUnencodedChars("ab").putChar('c').hash(),
533             hashFunction.newHasher().putChar('a').putChar('b').putChar('c').hash())
534         .testEquals();
535 
536     int size = random.nextInt(2048);
537     byte[] bytes = new byte[size];
538     random.nextBytes(bytes);
539     String string = new String(bytes);
540     assertEquals(hashFunction.hashUnencodedChars(string),
541         hashFunction.newHasher().putUnencodedChars(string).hash());
542     for (Charset charset : CHARSETS) {
543       assertEquals(hashFunction.hashString(string, charset),
544           hashFunction.newHasher().putString(string, charset).hash());
545     }
546   }
547 
548   /**
549    * This verifies that putUnencodedChars(String) and hashUnencodedChars(String) are equivalent,
550    * even for funny strings composed by (possibly unmatched, and mostly illegal) surrogate
551    * characters. (But doesn't test that they do the right thing - just their consistency).
552    */
553   private static void assertHashStringWithSurrogatesEquivalence(
554       HashFunction hashFunction, Random random) {
555     int size = random.nextInt(8) + 1;
556     char[] chars = new char[size];
557     for (int i = 0; i < chars.length; i++) {
558       chars[i] = random.nextBoolean() ? randomLowSurrogate(random) : randomHighSurrogate(random);
559     }
560     String string = new String(chars);
561     assertEquals(hashFunction.hashUnencodedChars(string),
562         hashFunction.newHasher().putUnencodedChars(string).hash());
563   }
564 
565   static char randomLowSurrogate(Random random) {
566     return (char) (Character.MIN_LOW_SURROGATE
567         + random.nextInt(Character.MAX_LOW_SURROGATE - Character.MIN_LOW_SURROGATE + 1));
568   }
569 
570   static char randomHighSurrogate(Random random) {
571     return (char) (Character.MIN_HIGH_SURROGATE
572         + random.nextInt(Character.MAX_HIGH_SURROGATE - Character.MIN_HIGH_SURROGATE + 1));
573   }
574 }