View Javadoc
1   /*
2    * Copyright (C) 2009 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package com.google.common.primitives;
18  
19  import static com.google.common.base.Preconditions.checkArgument;
20  import static com.google.common.base.Preconditions.checkNotNull;
21  
22  import com.google.common.annotations.Beta;
23  import com.google.common.annotations.VisibleForTesting;
24  
25  import sun.misc.Unsafe;
26  
27  import java.nio.ByteOrder;
28  import java.util.Comparator;
29  
30  /**
31   * Static utility methods pertaining to {@code byte} primitives that interpret
32   * values as <i>unsigned</i> (that is, any negative value {@code b} is treated
33   * as the positive value {@code 256 + b}). The corresponding methods that treat
34   * the values as signed are found in {@link SignedBytes}, and the methods for
35   * which signedness is not an issue are in {@link Bytes}.
36   *
37   * <p>See the Guava User Guide article on <a href=
38   * "http://code.google.com/p/guava-libraries/wiki/PrimitivesExplained">
39   * primitive utilities</a>.
40   *
41   * @author Kevin Bourrillion
42   * @author Martin Buchholz
43   * @author Hiroshi Yamauchi
44   * @author Louis Wasserman
45   * @since 1.0
46   */
47  public final class UnsignedBytes {
48    private UnsignedBytes() {}
49  
50    /**
51     * The largest power of two that can be represented as an unsigned {@code
52     * byte}.
53     *
54     * @since 10.0
55     */
56    public static final byte MAX_POWER_OF_TWO = (byte) 0x80;
57  
58    /**
59     * The largest value that fits into an unsigned byte.
60     *
61     * @since 13.0
62     */
63    public static final byte MAX_VALUE = (byte) 0xFF;
64  
65    private static final int UNSIGNED_MASK = 0xFF;
66  
67    /**
68     * Returns the value of the given byte as an integer, when treated as
69     * unsigned. That is, returns {@code value + 256} if {@code value} is
70     * negative; {@code value} itself otherwise.
71     *
72     * @since 6.0
73     */
74    public static int toInt(byte value) {
75      return value & UNSIGNED_MASK;
76    }
77  
78    /**
79     * Returns the {@code byte} value that, when treated as unsigned, is equal to
80     * {@code value}, if possible.
81     *
82     * @param value a value between 0 and 255 inclusive
83     * @return the {@code byte} value that, when treated as unsigned, equals
84     *     {@code value}
85     * @throws IllegalArgumentException if {@code value} is negative or greater
86     *     than 255
87     */
88    public static byte checkedCast(long value) {
89      if ((value >> Byte.SIZE) != 0) {
90        // don't use checkArgument here, to avoid boxing
91        throw new IllegalArgumentException("Out of range: " + value);
92      }
93      return (byte) value;
94    }
95  
96    /**
97     * Returns the {@code byte} value that, when treated as unsigned, is nearest
98     * in value to {@code value}.
99     *
100    * @param value any {@code long} value
101    * @return {@code (byte) 255} if {@code value >= 255}, {@code (byte) 0} if
102    *     {@code value <= 0}, and {@code value} cast to {@code byte} otherwise
103    */
104   public static byte saturatedCast(long value) {
105     if (value > toInt(MAX_VALUE)) {
106       return MAX_VALUE; // -1
107     }
108     if (value < 0) {
109       return (byte) 0;
110     }
111     return (byte) value;
112   }
113 
114   /**
115    * Compares the two specified {@code byte} values, treating them as unsigned
116    * values between 0 and 255 inclusive. For example, {@code (byte) -127} is
117    * considered greater than {@code (byte) 127} because it is seen as having
118    * the value of positive {@code 129}.
119    *
120    * @param a the first {@code byte} to compare
121    * @param b the second {@code byte} to compare
122    * @return a negative value if {@code a} is less than {@code b}; a positive
123    *     value if {@code a} is greater than {@code b}; or zero if they are equal
124    */
125   public static int compare(byte a, byte b) {
126     return toInt(a) - toInt(b);
127   }
128 
129   /**
130    * Returns the least value present in {@code array}.
131    *
132    * @param array a <i>nonempty</i> array of {@code byte} values
133    * @return the value present in {@code array} that is less than or equal to
134    *     every other value in the array
135    * @throws IllegalArgumentException if {@code array} is empty
136    */
137   public static byte min(byte... array) {
138     checkArgument(array.length > 0);
139     int min = toInt(array[0]);
140     for (int i = 1; i < array.length; i++) {
141       int next = toInt(array[i]);
142       if (next < min) {
143         min = next;
144       }
145     }
146     return (byte) min;
147   }
148 
149   /**
150    * Returns the greatest value present in {@code array}.
151    *
152    * @param array a <i>nonempty</i> array of {@code byte} values
153    * @return the value present in {@code array} that is greater than or equal
154    *     to every other value in the array
155    * @throws IllegalArgumentException if {@code array} is empty
156    */
157   public static byte max(byte... array) {
158     checkArgument(array.length > 0);
159     int max = toInt(array[0]);
160     for (int i = 1; i < array.length; i++) {
161       int next = toInt(array[i]);
162       if (next > max) {
163         max = next;
164       }
165     }
166     return (byte) max;
167   }
168 
169   /**
170    * Returns a string representation of x, where x is treated as unsigned.
171    *
172    * @since 13.0
173    */
174   @Beta
175   public static String toString(byte x) {
176     return toString(x, 10);
177   }
178 
179   /**
180    * Returns a string representation of {@code x} for the given radix, where {@code x} is treated
181    * as unsigned.
182    *
183    * @param x the value to convert to a string.
184    * @param radix the radix to use while working with {@code x}
185    * @throws IllegalArgumentException if {@code radix} is not between {@link Character#MIN_RADIX}
186    *         and {@link Character#MAX_RADIX}.
187    * @since 13.0
188    */
189   @Beta
190   public static String toString(byte x, int radix) {
191     checkArgument(radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX,
192         "radix (%s) must be between Character.MIN_RADIX and Character.MAX_RADIX", radix);
193     // Benchmarks indicate this is probably not worth optimizing.
194     return Integer.toString(toInt(x), radix);
195   }
196 
197   /**
198    * Returns the unsigned {@code byte} value represented by the given decimal string.
199    *
200    * @throws NumberFormatException if the string does not contain a valid unsigned {@code byte}
201    *         value
202    * @throws NullPointerException if {@code s} is null 
203    *         (in contrast to {@link Byte#parseByte(String)})
204    * @since 13.0
205    */
206   @Beta
207   public static byte parseUnsignedByte(String string) {
208     return parseUnsignedByte(string, 10);
209   }
210 
211   /**
212    * Returns the unsigned {@code byte} value represented by a string with the given radix.
213    *
214    * @param string the string containing the unsigned {@code byte} representation to be parsed.
215    * @param radix the radix to use while parsing {@code string}
216    * @throws NumberFormatException if the string does not contain a valid unsigned {@code byte}
217    *         with the given radix, or if {@code radix} is not between {@link Character#MIN_RADIX}
218    *         and {@link Character#MAX_RADIX}.
219    * @throws NullPointerException if {@code s} is null 
220    *         (in contrast to {@link Byte#parseByte(String)})
221    * @since 13.0
222    */
223   @Beta
224   public static byte parseUnsignedByte(String string, int radix) {
225     int parse = Integer.parseInt(checkNotNull(string), radix);
226     // We need to throw a NumberFormatException, so we have to duplicate checkedCast. =(
227     if (parse >> Byte.SIZE == 0) {
228       return (byte) parse;
229     } else {
230       throw new NumberFormatException("out of range: " + parse);
231     }
232   }
233 
234   /**
235    * Returns a string containing the supplied {@code byte} values separated by
236    * {@code separator}. For example, {@code join(":", (byte) 1, (byte) 2,
237    * (byte) 255)} returns the string {@code "1:2:255"}.
238    *
239    * @param separator the text that should appear between consecutive values in
240    *     the resulting string (but not at the start or end)
241    * @param array an array of {@code byte} values, possibly empty
242    */
243   public static String join(String separator, byte... array) {
244     checkNotNull(separator);
245     if (array.length == 0) {
246       return "";
247     }
248 
249     // For pre-sizing a builder, just get the right order of magnitude
250     StringBuilder builder = new StringBuilder(array.length * (3 + separator.length()));
251     builder.append(toInt(array[0]));
252     for (int i = 1; i < array.length; i++) {
253       builder.append(separator).append(toString(array[i]));
254     }
255     return builder.toString();
256   }
257 
258   /**
259    * Returns a comparator that compares two {@code byte} arrays
260    * lexicographically. That is, it compares, using {@link
261    * #compare(byte, byte)}), the first pair of values that follow any common
262    * prefix, or when one array is a prefix of the other, treats the shorter
263    * array as the lesser. For example, {@code [] < [0x01] < [0x01, 0x7F] <
264    * [0x01, 0x80] < [0x02]}. Values are treated as unsigned.
265    *
266    * <p>The returned comparator is inconsistent with {@link
267    * Object#equals(Object)} (since arrays support only identity equality), but
268    * it is consistent with {@link java.util.Arrays#equals(byte[], byte[])}.
269    *
270    * @see <a href="http://en.wikipedia.org/wiki/Lexicographical_order">
271    *     Lexicographical order article at Wikipedia</a>
272    * @since 2.0
273    */
274   public static Comparator<byte[]> lexicographicalComparator() {
275     return LexicographicalComparatorHolder.BEST_COMPARATOR;
276   }
277 
278   @VisibleForTesting
279   static Comparator<byte[]> lexicographicalComparatorJavaImpl() {
280     return LexicographicalComparatorHolder.PureJavaComparator.INSTANCE;
281   }
282 
283   /**
284    * Provides a lexicographical comparator implementation; either a Java
285    * implementation or a faster implementation based on {@link Unsafe}.
286    *
287    * <p>Uses reflection to gracefully fall back to the Java implementation if
288    * {@code Unsafe} isn't available.
289    */
290   @VisibleForTesting
291   static class LexicographicalComparatorHolder {
292     static final String UNSAFE_COMPARATOR_NAME =
293         LexicographicalComparatorHolder.class.getName() + "$UnsafeComparator";
294 
295     static final Comparator<byte[]> BEST_COMPARATOR = getBestComparator();
296 
297     @VisibleForTesting
298     enum UnsafeComparator implements Comparator<byte[]> {
299       INSTANCE;
300 
301       static final boolean BIG_ENDIAN =
302           ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
303 
304       /*
305        * The following static final fields exist for performance reasons.
306        *
307        * In UnsignedBytesBenchmark, accessing the following objects via static
308        * final fields is the fastest (more than twice as fast as the Java
309        * implementation, vs ~1.5x with non-final static fields, on x86_32)
310        * under the Hotspot server compiler. The reason is obviously that the
311        * non-final fields need to be reloaded inside the loop.
312        *
313        * And, no, defining (final or not) local variables out of the loop still
314        * isn't as good because the null check on the theUnsafe object remains
315        * inside the loop and BYTE_ARRAY_BASE_OFFSET doesn't get
316        * constant-folded.
317        *
318        * The compiler can treat static final fields as compile-time constants
319        * and can constant-fold them while (final or not) local variables are
320        * run time values.
321        */
322 
323       static final Unsafe theUnsafe;
324 
325       /** The offset to the first element in a byte array. */
326       static final int BYTE_ARRAY_BASE_OFFSET;
327 
328       static {
329         theUnsafe = getUnsafe();
330 
331         BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class);
332 
333         // sanity check - this should never fail
334         if (theUnsafe.arrayIndexScale(byte[].class) != 1) {
335           throw new AssertionError();
336         }
337       }
338       
339       /**
340        * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
341        * Replace with a simple call to Unsafe.getUnsafe when integrating
342        * into a jdk.
343        *
344        * @return a sun.misc.Unsafe
345        */
346       private static sun.misc.Unsafe getUnsafe() {
347           try {
348               return sun.misc.Unsafe.getUnsafe();
349           } catch (SecurityException tryReflectionInstead) {}
350           try {
351               return java.security.AccessController.doPrivileged
352               (new java.security.PrivilegedExceptionAction<sun.misc.Unsafe>() {
353                   public sun.misc.Unsafe run() throws Exception {
354                       Class<sun.misc.Unsafe> k = sun.misc.Unsafe.class;
355                       for (java.lang.reflect.Field f : k.getDeclaredFields()) {
356                           f.setAccessible(true);
357                           Object x = f.get(null);
358                           if (k.isInstance(x))
359                               return k.cast(x);
360                       }
361                       throw new NoSuchFieldError("the Unsafe");
362                   }});
363           } catch (java.security.PrivilegedActionException e) {
364               throw new RuntimeException("Could not initialize intrinsics",
365                                          e.getCause());
366           }
367       }
368 
369       @Override public int compare(byte[] left, byte[] right) {
370         int minLength = Math.min(left.length, right.length);
371         int minWords = minLength / Longs.BYTES;
372 
373         /*
374          * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
375          * time is no slower than comparing 4 bytes at a time even on 32-bit.
376          * On the other hand, it is substantially faster on 64-bit.
377          */
378         for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) {
379           long lw = theUnsafe.getLong(left, BYTE_ARRAY_BASE_OFFSET + (long) i);
380           long rw = theUnsafe.getLong(right, BYTE_ARRAY_BASE_OFFSET + (long) i);
381           if (lw != rw) {
382             if (BIG_ENDIAN) {
383               return UnsignedLongs.compare(lw, rw);
384             }
385 
386             /*
387              * We want to compare only the first index where left[index] != right[index].
388              * This corresponds to the least significant nonzero byte in lw ^ rw, since lw
389              * and rw are little-endian.  Long.numberOfTrailingZeros(diff) tells us the least 
390              * significant nonzero bit, and zeroing out the first three bits of L.nTZ gives us the 
391              * shift to get that least significant nonzero byte.
392              */
393             int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
394             return (int) (((lw >>> n) & UNSIGNED_MASK) - ((rw >>> n) & UNSIGNED_MASK));
395           }
396         }
397 
398         // The epilogue to cover the last (minLength % 8) elements.
399         for (int i = minWords * Longs.BYTES; i < minLength; i++) {
400           int result = UnsignedBytes.compare(left[i], right[i]);
401           if (result != 0) {
402             return result;
403           }
404         }
405         return left.length - right.length;
406       }
407     }
408 
409     enum PureJavaComparator implements Comparator<byte[]> {
410       INSTANCE;
411 
412       @Override public int compare(byte[] left, byte[] right) {
413         int minLength = Math.min(left.length, right.length);
414         for (int i = 0; i < minLength; i++) {
415           int result = UnsignedBytes.compare(left[i], right[i]);
416           if (result != 0) {
417             return result;
418           }
419         }
420         return left.length - right.length;
421       }
422     }
423 
424     /**
425      * Returns the Unsafe-using Comparator, or falls back to the pure-Java
426      * implementation if unable to do so.
427      */
428     static Comparator<byte[]> getBestComparator() {
429       try {
430         Class<?> theClass = Class.forName(UNSAFE_COMPARATOR_NAME);
431 
432         // yes, UnsafeComparator does implement Comparator<byte[]>
433         @SuppressWarnings("unchecked")
434         Comparator<byte[]> comparator =
435             (Comparator<byte[]>) theClass.getEnumConstants()[0];
436         return comparator;
437       } catch (Throwable t) { // ensure we really catch *everything*
438         return lexicographicalComparatorJavaImpl();
439       }
440     }
441   }
442 }