1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.google.common.hash;
18
19 import static org.junit.Assert.assertEquals;
20
21 import com.google.common.base.Charsets;
22 import com.google.common.collect.ImmutableSet;
23 import com.google.common.collect.Sets;
24 import com.google.common.primitives.Ints;
25 import com.google.common.testing.EqualsTester;
26
27 import org.junit.Assert;
28
29 import java.nio.charset.Charset;
30 import java.util.Arrays;
31 import java.util.Random;
32 import java.util.Set;
33
34
35
36
37
38
39
40 final class HashTestUtils {
41 private HashTestUtils() {}
42
43
44
45
46 static byte[] ascii(String string) {
47 byte[] bytes = new byte[string.length()];
48 for (int i = 0; i < string.length(); i++) {
49 bytes[i] = (byte) string.charAt(i);
50 }
51 return bytes;
52 }
53
54 interface HashFn {
55 byte[] hash(byte[] input, int seed);
56 }
57
58 static void verifyHashFunction(HashFn hashFunction, int hashbits, int expected) {
59 int hashBytes = hashbits / 8;
60
61 byte[] key = new byte[256];
62 byte[] hashes = new byte[hashBytes * 256];
63
64
65 for (int i = 0; i < 256; i++) {
66 key[i] = (byte) i;
67 int seed = 256 - i;
68 byte[] hash = hashFunction.hash(Arrays.copyOf(key, i), seed);
69 System.arraycopy(hash, 0, hashes, i * hashBytes, hash.length);
70 }
71
72
73 byte[] result = hashFunction.hash(hashes, 0);
74
75
76 int verification = Integer.reverseBytes(Ints.fromByteArray(result));
77
78 if (expected != verification) {
79 throw new AssertionError("Expected: " + Integer.toHexString(expected)
80 + " got: " + Integer.toHexString(verification));
81 }
82 }
83
84 static final Funnel<Object> BAD_FUNNEL = new Funnel<Object>() {
85 @Override public void funnel(Object object, PrimitiveSink bytePrimitiveSink) {
86 bytePrimitiveSink.putInt(object.hashCode());
87 }
88 };
89
90 static enum RandomHasherAction {
91 PUT_BOOLEAN() {
92 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
93 boolean value = random.nextBoolean();
94 for (PrimitiveSink sink : sinks) {
95 sink.putBoolean(value);
96 }
97 }
98 },
99 PUT_BYTE() {
100 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
101 int value = random.nextInt();
102 for (PrimitiveSink sink : sinks) {
103 sink.putByte((byte) value);
104 }
105 }
106 },
107 PUT_SHORT() {
108 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
109 short value = (short) random.nextInt();
110 for (PrimitiveSink sink : sinks) {
111 sink.putShort(value);
112 }
113 }
114 },
115 PUT_CHAR() {
116 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
117 char value = (char) random.nextInt();
118 for (PrimitiveSink sink : sinks) {
119 sink.putChar(value);
120 }
121 }
122 },
123 PUT_INT() {
124 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
125 int value = random.nextInt();
126 for (PrimitiveSink sink : sinks) {
127 sink.putInt(value);
128 }
129 }
130 },
131 PUT_LONG() {
132 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
133 long value = random.nextLong();
134 for (PrimitiveSink sink : sinks) {
135 sink.putLong(value);
136 }
137 }
138 },
139 PUT_FLOAT() {
140 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
141 float value = random.nextFloat();
142 for (PrimitiveSink sink : sinks) {
143 sink.putFloat(value);
144 }
145 }
146 },
147 PUT_DOUBLE() {
148 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
149 double value = random.nextDouble();
150 for (PrimitiveSink sink : sinks) {
151 sink.putDouble(value);
152 }
153 }
154 },
155 PUT_BYTES() {
156 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
157 byte[] value = new byte[random.nextInt(128)];
158 random.nextBytes(value);
159 for (PrimitiveSink sink : sinks) {
160 sink.putBytes(value);
161 }
162 }
163 },
164 PUT_BYTES_INT_INT() {
165 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
166 byte[] value = new byte[random.nextInt(128)];
167 random.nextBytes(value);
168 int off = random.nextInt(value.length + 1);
169 int len = random.nextInt(value.length - off + 1);
170 for (PrimitiveSink sink : sinks) {
171 sink.putBytes(value);
172 }
173 }
174 },
175 PUT_STRING() {
176 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
177 char[] value = new char[random.nextInt(128)];
178 for (int i = 0; i < value.length; i++) {
179 value[i] = (char) random.nextInt();
180 }
181 String s = new String(value);
182 for (PrimitiveSink sink : sinks) {
183 sink.putUnencodedChars(s);
184 }
185 }
186 },
187 PUT_STRING_LOW_SURROGATE() {
188 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
189 String s = new String(new char[] { randomLowSurrogate(random) });
190 for (PrimitiveSink sink : sinks) {
191 sink.putUnencodedChars(s);
192 }
193 }
194 },
195 PUT_STRING_HIGH_SURROGATE() {
196 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
197 String s = new String(new char[] { randomHighSurrogate(random) });
198 for (PrimitiveSink sink : sinks) {
199 sink.putUnencodedChars(s);
200 }
201 }
202 },
203 PUT_STRING_LOW_HIGH_SURROGATE() {
204 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
205 String s = new String(new char[] {
206 randomLowSurrogate(random), randomHighSurrogate(random)});
207 for (PrimitiveSink sink : sinks) {
208 sink.putUnencodedChars(s);
209 }
210 }
211 },
212 PUT_STRING_HIGH_LOW_SURROGATE() {
213 @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
214 String s = new String(new char[] {
215 randomHighSurrogate(random), randomLowSurrogate(random)});
216 for (PrimitiveSink sink : sinks) {
217 sink.putUnencodedChars(s);
218 }
219 }
220 };
221
222 abstract void performAction(Random random, Iterable<? extends PrimitiveSink> sinks);
223
224 private static final RandomHasherAction[] actions = values();
225
226 static RandomHasherAction pickAtRandom(Random random) {
227 return actions[random.nextInt(actions.length)];
228 }
229 }
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245 static void checkNoFunnels(HashFunction function) {
246 Random rand = new Random(0);
247 int keyBits = 32;
248 int hashBits = function.bits();
249
250
251 for (int i = 0; i < keyBits; i++) {
252 int same = 0x0;
253 int diff = 0x0;
254 int count = 0;
255
256 int maxCount = (int) (4 * Math.log(2 * keyBits * hashBits) + 1);
257 while (same != 0xffffffff || diff != 0xffffffff) {
258 int key1 = rand.nextInt();
259
260 int key2 = key1 ^ (1 << i);
261
262 int hash1 = function.hashInt(key1).asInt();
263 int hash2 = function.hashInt(key2).asInt();
264
265 same |= ~(hash1 ^ hash2);
266
267 diff |= (hash1 ^ hash2);
268
269 count++;
270
271
272 if (count > maxCount) {
273 Assert.fail("input bit(" + i + ") was found not to affect all " +
274 hashBits + " output bits; The unaffected bits are " +
275 "as follows: " + ~(same & diff) + ". This was " +
276 "determined after " + count + " trials.");
277 }
278 }
279 }
280 }
281
282
283
284
285
286
287
288 static void checkAvalanche(HashFunction function, int trials, double epsilon) {
289 Random rand = new Random(0);
290 int keyBits = 32;
291 int hashBits = function.bits();
292 for (int i = 0; i < keyBits; i++) {
293 int[] same = new int[hashBits];
294 int[] diff = new int[hashBits];
295
296 for (int j = 0; j < trials; j++) {
297 int key1 = rand.nextInt();
298
299 int key2 = key1 ^ (1 << i);
300
301 int hash1 = function.hashInt(key1).asInt();
302 int hash2 = function.hashInt(key2).asInt();
303 for (int k = 0; k < hashBits; k++) {
304 if ((hash1 & (1 << k)) == (hash2 & (1 << k))) {
305 same[k] += 1;
306 } else {
307 diff[k] += 1;
308 }
309 }
310 }
311
312 for (int j = 0; j < hashBits; j++) {
313 double prob = (double) diff[j] / (double) (diff[j] + same[j]);
314 Assert.assertEquals(0.50d, prob, epsilon);
315 }
316 }
317 }
318
319
320
321
322
323
324
325
326
327
328 static void checkNo2BitCharacteristics(HashFunction function) {
329 Random rand = new Random(0);
330 int keyBits = 32;
331
332
333 for (int i = 0; i < keyBits; i++) {
334 for (int j = 0; j < keyBits; j++) {
335 if (j <= i) continue;
336 int count = 0;
337 int maxCount = 20;
338 boolean diff = false;
339
340 while (!diff) {
341 int delta = (1 << i) | (1 << j);
342 int key1 = rand.nextInt();
343
344 int key2 = key1 ^ delta;
345
346
347 int hash1 = function.hashInt(key1).asInt();
348 int hash2 = function.hashInt(key2).asInt();
349
350
351
352 if ((hash1 ^ hash2) != delta) {
353 diff = true;
354 continue;
355 }
356
357
358
359
360 count++;
361 if (count > maxCount) {
362 Assert.fail("2-bit delta (" + i + ", " + j + ") is likely a " +
363 "characteristic for this hash. This was " +
364 "determined after " + count + " trials");
365 }
366 }
367 }
368 }
369 }
370
371
372
373
374
375 static void check2BitAvalanche(HashFunction function, int trials, double epsilon) {
376 Random rand = new Random(0);
377 int keyBits = 32;
378 int hashBits = function.bits();
379 for (int bit1 = 0; bit1 < keyBits; bit1++) {
380 for (int bit2 = 0; bit2 < keyBits; bit2++) {
381 if (bit2 <= bit1) continue;
382 int delta = (1 << bit1) | (1 << bit2);
383 int[] same = new int[hashBits];
384 int[] diff = new int[hashBits];
385
386 for (int j = 0; j < trials; j++) {
387 int key1 = rand.nextInt();
388
389 int key2 = key1 ^ delta;
390
391 int hash1 = function.hashInt(key1).asInt();
392 int hash2 = function.hashInt(key2).asInt();
393 for (int k = 0; k < hashBits; k++) {
394 if ((hash1 & (1 << k)) == (hash2 & (1 << k))) {
395 same[k] += 1;
396 } else {
397 diff[k] += 1;
398 }
399 }
400 }
401
402 for (int j = 0; j < hashBits; j++) {
403 double prob = (double) diff[j] / (double) (diff[j] + same[j]);
404 Assert.assertEquals(0.50d, prob, epsilon);
405 }
406 }
407 }
408 }
409
410
411
412
413
414 static void assertInvariants(HashFunction hashFunction) {
415 int objects = 100;
416 Set<HashCode> hashcodes = Sets.newHashSetWithExpectedSize(objects);
417 for (int i = 0; i < objects; i++) {
418 Object o = new Object();
419 HashCode hashcode1 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL);
420 HashCode hashcode2 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL);
421 Assert.assertEquals(hashcode1, hashcode2);
422 Assert.assertEquals(hashFunction.bits(), hashcode1.bits());
423 Assert.assertEquals(hashFunction.bits(), hashcode1.asBytes().length * 8);
424 hashcodes.add(hashcode1);
425 }
426 Assert.assertTrue(hashcodes.size() > objects * 0.95);
427
428 assertHashBytesThrowsCorrectExceptions(hashFunction);
429 assertIndependentHashers(hashFunction);
430 assertShortcutsAreEquivalent(hashFunction, 512);
431 }
432
433 static void assertHashBytesThrowsCorrectExceptions(HashFunction hashFunction) {
434 hashFunction.hashBytes(new byte[64], 0, 0);
435
436 try {
437 hashFunction.hashBytes(new byte[128], -1, 128);
438 Assert.fail();
439 } catch (IndexOutOfBoundsException expected) {}
440 try {
441 hashFunction.hashBytes(new byte[128], 64, 256 );
442 Assert.fail();
443 } catch (IndexOutOfBoundsException expected) {}
444 try {
445 hashFunction.hashBytes(new byte[64], 0, -1);
446 Assert.fail();
447 } catch (IndexOutOfBoundsException expected) {}
448 }
449
450 static void assertIndependentHashers(HashFunction hashFunction) {
451 int numActions = 100;
452
453 HashCode expected1 = randomHash(hashFunction, new Random(1L), numActions);
454 HashCode expected2 = randomHash(hashFunction, new Random(2L), numActions);
455
456
457 Random random1 = new Random(1L);
458 Random random2 = new Random(2L);
459 Hasher hasher1 = hashFunction.newHasher();
460 Hasher hasher2 = hashFunction.newHasher();
461 for (int i = 0; i < numActions; i++) {
462 RandomHasherAction.pickAtRandom(random1).performAction(random1, ImmutableSet.of(hasher1));
463 RandomHasherAction.pickAtRandom(random2).performAction(random2, ImmutableSet.of(hasher2));
464 }
465
466 Assert.assertEquals(expected1, hasher1.hash());
467 Assert.assertEquals(expected2, hasher2.hash());
468 }
469
470 static HashCode randomHash(HashFunction hashFunction, Random random, int numActions) {
471 Hasher hasher = hashFunction.newHasher();
472 for (int i = 0; i < numActions; i++) {
473 RandomHasherAction.pickAtRandom(random).performAction(random, ImmutableSet.of(hasher));
474 }
475 return hasher.hash();
476 }
477
478 private static void assertShortcutsAreEquivalent(HashFunction hashFunction, int trials) {
479 Random random = new Random(42085L);
480 for (int i = 0; i < trials; i++) {
481 assertHashBytesEquivalence(hashFunction, random);
482 assertHashIntEquivalence(hashFunction, random);
483 assertHashLongEquivalence(hashFunction, random);
484 assertHashStringEquivalence(hashFunction, random);
485 assertHashStringWithSurrogatesEquivalence(hashFunction, random);
486 }
487 }
488
489 private static void assertHashBytesEquivalence(HashFunction hashFunction, Random random) {
490 int size = random.nextInt(2048);
491 byte[] bytes = new byte[size];
492 random.nextBytes(bytes);
493 assertEquals(hashFunction.hashBytes(bytes),
494 hashFunction.newHasher(size).putBytes(bytes).hash());
495 int off = random.nextInt(size);
496 int len = random.nextInt(size - off);
497 assertEquals(hashFunction.hashBytes(bytes, off, len),
498 hashFunction.newHasher(size).putBytes(bytes, off, len).hash());
499 }
500
501 private static void assertHashIntEquivalence(HashFunction hashFunction, Random random) {
502 int i = random.nextInt();
503 assertEquals(hashFunction.hashInt(i),
504 hashFunction.newHasher().putInt(i).hash());
505 }
506
507 private static void assertHashLongEquivalence(HashFunction hashFunction, Random random) {
508 long l = random.nextLong();
509 assertEquals(hashFunction.hashLong(l),
510 hashFunction.newHasher().putLong(l).hash());
511 }
512
513 private static final ImmutableSet<Charset> CHARSETS = ImmutableSet.of(
514 Charsets.ISO_8859_1,
515 Charsets.US_ASCII,
516 Charsets.UTF_16,
517 Charsets.UTF_16BE,
518 Charsets.UTF_16LE,
519 Charsets.UTF_8);
520
521 private static void assertHashStringEquivalence(HashFunction hashFunction, Random random) {
522
523 new EqualsTester()
524 .addEqualityGroup(
525 hashFunction.hashUnencodedChars("abc"),
526 hashFunction.newHasher().putUnencodedChars("abc").hash(),
527 hashFunction.newHasher().putUnencodedChars("ab").putUnencodedChars("c").hash(),
528 hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("bc").hash(),
529 hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("b")
530 .putUnencodedChars("c").hash(),
531 hashFunction.newHasher().putChar('a').putUnencodedChars("bc").hash(),
532 hashFunction.newHasher().putUnencodedChars("ab").putChar('c').hash(),
533 hashFunction.newHasher().putChar('a').putChar('b').putChar('c').hash())
534 .testEquals();
535
536 int size = random.nextInt(2048);
537 byte[] bytes = new byte[size];
538 random.nextBytes(bytes);
539 String string = new String(bytes);
540 assertEquals(hashFunction.hashUnencodedChars(string),
541 hashFunction.newHasher().putUnencodedChars(string).hash());
542 for (Charset charset : CHARSETS) {
543 assertEquals(hashFunction.hashString(string, charset),
544 hashFunction.newHasher().putString(string, charset).hash());
545 }
546 }
547
548
549
550
551
552
553 private static void assertHashStringWithSurrogatesEquivalence(
554 HashFunction hashFunction, Random random) {
555 int size = random.nextInt(8) + 1;
556 char[] chars = new char[size];
557 for (int i = 0; i < chars.length; i++) {
558 chars[i] = random.nextBoolean() ? randomLowSurrogate(random) : randomHighSurrogate(random);
559 }
560 String string = new String(chars);
561 assertEquals(hashFunction.hashUnencodedChars(string),
562 hashFunction.newHasher().putUnencodedChars(string).hash());
563 }
564
565 static char randomLowSurrogate(Random random) {
566 return (char) (Character.MIN_LOW_SURROGATE
567 + random.nextInt(Character.MAX_LOW_SURROGATE - Character.MIN_LOW_SURROGATE + 1));
568 }
569
570 static char randomHighSurrogate(Random random) {
571 return (char) (Character.MIN_HIGH_SURROGATE
572 + random.nextInt(Character.MAX_HIGH_SURROGATE - Character.MIN_HIGH_SURROGATE + 1));
573 }
574 }