/* * Copyright 2006 Amazon Technologies, Inc. or its affiliates. * Amazon, Amazon.com and Carbonado are trademarks or registered trademarks * of Amazon Technologies, Inc. or its affiliates. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.amazon.carbonado.raw; /** * A very low-level class that supports encoding of primitive data into unique, * sortable byte array keys. If the data to encode is of a variable size, then * it is written in base-32768, using only byte values 32..223. This allows * special values such as nulls and terminators to be unambiguously * encoded. Terminators for variable data can be encoded using 1 for ascending * order and 254 for descending order. Nulls can be encoded as 255 for high * ordering and 0 for low ordering. * * @author Brian S O'Neill * @see KeyDecoder */ public class KeyEncoder extends DataEncoder { /** Byte to terminate variable data encoded for ascending order */ static final byte TERMINATOR = (byte)1; /** * Encodes the given signed integer into exactly 4 bytes for descending * order. * * @param value signed integer value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(int value, byte[] dst, int dstOffset) { encode(~value, dst, dstOffset); } /** * Encodes the given signed Integer object into exactly 1 or 5 bytes for * descending order. If the Integer object is never expected to be null, * consider encoding as an int primitive. * * @param value optional signed Integer value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encodeDesc(Integer value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_LOW; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_LOW; encode(~value.intValue(), dst, dstOffset + 1); return 5; } } /** * Encodes the given signed long into exactly 8 bytes for descending order. * * @param value signed long value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(long value, byte[] dst, int dstOffset) { encode(~value, dst, dstOffset); } /** * Encodes the given signed Long object into exactly 1 or 9 bytes for * descending order. If the Long object is never expected to be null, * consider encoding as a long primitive. * * @param value optional signed Long value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encodeDesc(Long value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_LOW; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_LOW; encode(~value.longValue(), dst, dstOffset + 1); return 9; } } /** * Encodes the given signed byte into exactly 1 byte for descending order. * * @param value signed byte value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(byte value, byte[] dst, int dstOffset) { dst[dstOffset] = (byte)(value ^ 0x7f); } /** * Encodes the given signed Byte object into exactly 1 or 2 bytes for * descending order. If the Byte object is never expected to be null, * consider encoding as a byte primitive. * * @param value optional signed Byte value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encodeDesc(Byte value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_LOW; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_LOW; dst[dstOffset + 1] = (byte)(value ^ 0x7f); return 2; } } /** * Encodes the given signed short into exactly 2 bytes for descending * order. * * @param value signed short value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(short value, byte[] dst, int dstOffset) { encode((short) ~value, dst, dstOffset); } /** * Encodes the given signed Short object into exactly 1 or 3 bytes for * descending order. If the Short object is never expected to be null, * consider encoding as a short primitive. * * @param value optional signed Short value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encodeDesc(Short value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_LOW; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_LOW; encode((short) ~value.shortValue(), dst, dstOffset + 1); return 3; } } /** * Encodes the given character into exactly 2 bytes for descending order. * * @param value character value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(char value, byte[] dst, int dstOffset) { encode((char) ~value, dst, dstOffset); } /** * Encodes the given Character object into exactly 1 or 3 bytes for * descending order. If the Character object is never expected to be null, * consider encoding as a char primitive. * * @param value optional Character value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encodeDesc(Character value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_LOW; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_LOW; encode((char) ~value.charValue(), dst, dstOffset + 1); return 3; } } /** * Encodes the given boolean into exactly 1 byte for descending order. * * @param value boolean value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(boolean value, byte[] dst, int dstOffset) { dst[dstOffset] = value ? (byte)127 : (byte)128; } /** * Encodes the given Boolean object into exactly 1 byte for descending * order. * * @param value optional Boolean value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(Boolean value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_LOW; } else { dst[dstOffset] = value.booleanValue() ? (byte)127 : (byte)128; } } /** * Encodes the given float into exactly 4 bytes for descending order. * * @param value float value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(float value, byte[] dst, int dstOffset) { int bits = Float.floatToIntBits(value); if (bits >= 0) { bits ^= 0x7fffffff; } dst[dstOffset ] = (byte)(bits >> 24); dst[dstOffset + 1] = (byte)(bits >> 16); dst[dstOffset + 2] = (byte)(bits >> 8); dst[dstOffset + 3] = (byte)bits; } /** * Encodes the given Float object into exactly 4 bytes for descending * order. A non-canonical NaN value is used to represent null. * * @param value optional Float value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(Float value, byte[] dst, int dstOffset) { if (value == null) { encode(~0x7fffffff, dst, dstOffset); } else { encodeDesc(value.floatValue(), dst, dstOffset); } } /** * Encodes the given double into exactly 8 bytes for descending order. * * @param value double value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(double value, byte[] dst, int dstOffset) { long bits = Double.doubleToLongBits(value); if (bits >= 0) { bits ^= 0x7fffffffffffffffL; } int w = (int)(bits >> 32); dst[dstOffset ] = (byte)(w >> 24); dst[dstOffset + 1] = (byte)(w >> 16); dst[dstOffset + 2] = (byte)(w >> 8); dst[dstOffset + 3] = (byte)w; w = (int)bits; dst[dstOffset + 4] = (byte)(w >> 24); dst[dstOffset + 5] = (byte)(w >> 16); dst[dstOffset + 6] = (byte)(w >> 8); dst[dstOffset + 7] = (byte)w; } /** * Encodes the given Double object into exactly 8 bytes for descending * order. A non-canonical NaN value is used to represent null. * * @param value optional Double value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encodeDesc(Double value, byte[] dst, int dstOffset) { if (value == null) { encode(~0x7fffffffffffffffL, dst, dstOffset); } else { encodeDesc(value.doubleValue(), dst, dstOffset); } } /** * Encodes the given optional unsigned byte array into a variable amount of * bytes. If the byte array is null, exactly 1 byte is written. Otherwise, * the amount written can be determined by calling calculateEncodedLength. * * @param value byte array value to encode, may be null * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(byte[] value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } return encode(value, 0, value.length, dst, dstOffset, 0); } /** * Encodes the given optional unsigned byte array into a variable amount of * bytes. If the byte array is null, exactly 1 byte is written. Otherwise, * the amount written can be determined by calling calculateEncodedLength. * * @param value byte array value to encode, may be null * @param valueOffset offset into byte array * @param valueLength length of data in byte array * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(byte[] value, int valueOffset, int valueLength, byte[] dst, int dstOffset) { return encode(value, valueOffset, valueLength, dst, dstOffset, 0); } /** * Encodes the given optional unsigned byte array into a variable amount of * bytes for descending order. If the byte array is null, exactly 1 byte is * written. Otherwise, the amount written is determined by calling * calculateEncodedLength. * * @param value byte array value to encode, may be null * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encodeDesc(byte[] value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_LOW; return 1; } return encode(value, 0, value.length, dst, dstOffset, -1); } /** * Encodes the given optional unsigned byte array into a variable amount of * bytes for descending order. If the byte array is null, exactly 1 byte is * written. Otherwise, the amount written is determined by calling * calculateEncodedLength. * * @param value byte array value to encode, may be null * @param valueOffset offset into byte array * @param valueLength length of data in byte array * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encodeDesc(byte[] value, int valueOffset, int valueLength, byte[] dst, int dstOffset) { return encode(value, valueOffset, valueLength, dst, dstOffset, -1); } /** * @param xorMask 0 for normal encoding, -1 for descending encoding */ private static int encode(byte[] value, int valueOffset, int valueLength, byte[] dst, int dstOffset, int xorMask) { if (value == null) { dst[dstOffset] = (byte)(NULL_BYTE_HIGH ^ xorMask); return 1; } final int originalOffset = dstOffset; // Value is encoded in base-32768. int accumBits = 0; int accum = 0; final int end = valueOffset + valueLength; for (int i=valueOffset; i> (8 - supply)); emitDigit(accum, dst, dstOffset, xorMask); dstOffset += 2; accumBits = 8 - supply; accum = value[i] & ((1 << accumBits) - 1); } } if (accumBits > 0) { // Pad with zeros. accum <<= (15 - accumBits); if (accumBits <= 7) { // Since amount of significant bits is small, emit only the // upper half of the digit. The following code is modified from // emitDigit. int a = (accum * 21845) >> 22; if (accum - ((a << 7) + (a << 6)) == 192) { a++; } dst[dstOffset++] = (byte)((a + 32) ^ xorMask); } else { emitDigit(accum, dst, dstOffset, xorMask); dstOffset += 2; } } // Append terminator. dst[dstOffset++] = (byte)(TERMINATOR ^ xorMask); return dstOffset - originalOffset; } /** * Emits a base-32768 digit using exactly two bytes. The first byte is in the range * 32..202 and the second byte is in the range 32..223. * * @param value digit value in the range 0..32767 * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @param xorMask 0 for normal encoding, -1 for descending encoding */ private static void emitDigit(int value, byte[] dst, int dstOffset, int xorMask) { // The first byte is computed as ((value / 192) + 32) and the second // byte is computed as ((value % 192) + 32). To speed things up a bit, // the integer division and remainder operations are replaced with a // scaled multiplication. // approximate value / 192 int a = (value * 21845) >> 22; // approximate value % 192 // Note: the value 192 was chosen as a divisor because a multiply by // 192 can be replaced with two summed shifts. int b = value - ((a << 7) + (a << 6)); if (b == 192) { // Fix error. a++; b = 0; } dst[dstOffset++] = (byte)((a + 32) ^ xorMask); dst[dstOffset] = (byte)((b + 32) ^ xorMask); } /** * Returns the amount of bytes required to encode a byte array of the given * length. * * @param value byte array value to encode, may be null * @return amount of bytes needed to encode */ public static int calculateEncodedLength(byte[] value) { return value == null ? 1 : calculateEncodedLength(value, 0, value.length); } /** * Returns the amount of bytes required to encode the given byte array. * * @param value byte array value to encode, may be null * @param valueOffset offset into byte array * @param valueLength length of data in byte array * @return amount of bytes needed to encode */ public static int calculateEncodedLength(byte[] value, int valueOffset, int valueLength) { // The add of 119 is used to force ceiling rounding. return value == null ? 1 : (((valueLength << 7) + 119) / 120 + 1); } /** * Encodes the given optional String into a variable amount of bytes. The * amount written can be determined by calling * calculateEncodedStringLength. *

* Strings are encoded in a fashion similar to UTF-8, in that ASCII * characters are usually written in one byte. This encoding is more * efficient than UTF-8, but it isn't compatible with UTF-8. * * @param value String value to encode, may be null * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(String value, byte[] dst, int dstOffset) { return encode(value, dst, dstOffset, 0); } /** * Encodes the given optional String into a variable amount of bytes for * descending order. The amount written can be determined by calling * calculateEncodedStringLength. *

* Strings are encoded in a fashion similar to UTF-8, in that ASCII * characters are usually written in one byte. This encoding is more * efficient than UTF-8, but it isn't compatible with UTF-8. * * @param value String value to encode, may be null * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encodeDesc(String value, byte[] dst, int dstOffset) { return encode(value, dst, dstOffset, -1); } /** * @param xorMask 0 for normal encoding, -1 for descending encoding */ private static int encode(String value, byte[] dst, int dstOffset, int xorMask) { if (value == null) { dst[dstOffset] = (byte)(NULL_BYTE_HIGH ^ xorMask); return 1; } final int originalOffset = dstOffset; // All characters have an offset of 2 added, in order to reserve bytes // 0 and 1 for encoding nulls and terminators. This means the ASCII // string "HelloWorld" is actually encoded as "JgnnqYqtnf". This also // means that the ASCII '~' and del characters are encoded in two bytes. int length = value.length(); for (int i = 0; i < length; i++) { int c = value.charAt(i) + 2; if (c <= 0x7f) { // 0xxxxxxx dst[dstOffset++] = (byte)(c ^ xorMask); } else if (c <= 12415) { // 10xxxxxx xxxxxxxx // Second byte cannot have the values 0, 1, 254, or 255 because // they clash with null and terminator bytes. Divide by 192 and // store in first 6 bits. The remainder, with 32 added, goes // into the second byte. Note that (192 * 63 + 191) + 128 == 12415. // 63 is the maximum value that can be represented in 6 bits. c -= 128; // c will always be at least 128, so normalize. // approximate value / 192 int a = (c * 21845) >> 22; // approximate value % 192 // Note: the value 192 was chosen as a divisor because a multiply by // 192 can be replaced with two summed shifts. c = c - ((a << 7) + (a << 6)); if (c == 192) { // Fix error. a++; c = 0; } dst[dstOffset++] = (byte)((0x80 | a) ^ xorMask); dst[dstOffset++] = (byte)((c + 32) ^ xorMask); } else { // 110xxxxx xxxxxxxx xxxxxxxx if ((c - 2) >= 0xd800 && (c - 2) <= 0xdbff) { // Found a high surrogate. Verify that surrogate pair is // well-formed. Low surrogate must follow high surrogate. if (i + 1 < length) { int c2 = value.charAt(i + 1); if (c2 >= 0xdc00 && c2 <= 0xdfff) { c = ((((c - 2) & 0x3ff) << 10) | (c2 & 0x3ff)) + 0x10002; i++; } } } // Second and third bytes cannot have the values 0, 1, 254, or // 255 because they clash with null and terminator // bytes. Divide by 192 twice, storing the first and second // remainders in the third and second bytes, respectively. // Note that largest unicode value supported is 2^20 + 65535 == // 1114111. When divided by 192 twice, the value is 30, which // just barely fits in the 5 available bits of the first byte. c -= 12416; // c will always be at least 12416, so normalize. int a = (int)((c * 21845L) >> 22); c = c - ((a << 7) + (a << 6)); if (c == 192) { a++; c = 0; } dst[dstOffset + 2] = (byte)((c + 32) ^ xorMask); c = (a * 21845) >> 22; a = a - ((c << 7) + (c << 6)); if (a == 192) { c++; a = 0; } dst[dstOffset++] = (byte)((0xc0 | c) ^ xorMask); dst[dstOffset++] = (byte)((a + 32) ^ xorMask); dstOffset++; } } // Append terminator. dst[dstOffset++] = (byte)(TERMINATOR ^ xorMask); return dstOffset - originalOffset; } /** * Returns the amount of bytes required to encode the given String. * * @param value String to encode, may be null */ public static int calculateEncodedStringLength(String value) { int encodedLen = 1; if (value != null) { int valueLength = value.length(); for (int i = 0; i < valueLength; i++) { int c = value.charAt(i); if (c <= (0x7f - 2)) { encodedLen++; } else if (c <= (12415 - 2)) { encodedLen += 2; } else { if (c >= 0xd800 && c <= 0xdbff) { // Found a high surrogate. Verify that surrogate pair is // well-formed. Low surrogate must follow high surrogate. if (i + 1 < valueLength) { int c2 = value.charAt(i + 1); if (c2 >= 0xdc00 && c2 <= 0xdfff) { i++; } } } encodedLen += 3; } } } return encodedLen; } /** * Encodes the given byte array for use when there is only a single * required property, descending order, whose type is a byte array. The * original byte array is returned if the length is zero. */ public static byte[] encodeSingleDesc(byte[] value) { return encodeSingleDesc(value, 0, 0); } /** * Encodes the given byte array for use when there is only a single * required property, descending order, whose type is a byte array. The * original byte array is returned if the length and padding lengths are * zero. * * @param prefixPadding amount of extra bytes to allocate at start of encoded byte array * @param suffixPadding amount of extra bytes to allocate at end of encoded byte array */ public static byte[] encodeSingleDesc(byte[] value, int prefixPadding, int suffixPadding) { int length = value.length; if (prefixPadding <= 0 && suffixPadding <= 0 && length == 0) { return value; } byte[] dst = new byte[prefixPadding + length + suffixPadding]; while (--length >= 0) { dst[prefixPadding + length] = (byte) (~value[length]); } return dst; } /** * Encodes the given byte array for use when there is only a single * nullable property, descending order, whose type is a byte array. */ public static byte[] encodeSingleNullableDesc(byte[] value) { return encodeSingleNullableDesc(value, 0, 0); } /** * Encodes the given byte array for use when there is only a single * nullable property, descending order, whose type is a byte array. * * @param prefixPadding amount of extra bytes to allocate at start of encoded byte array * @param suffixPadding amount of extra bytes to allocate at end of encoded byte array */ public static byte[] encodeSingleNullableDesc(byte[] value, int prefixPadding, int suffixPadding) { if (prefixPadding <= 0 && suffixPadding <= 0) { if (value == null) { return NULL_BYTE_ARRAY_LOW; } int length = value.length; if (length == 0) { return NOT_NULL_BYTE_ARRAY_LOW; } byte[] dst = new byte[1 + length]; dst[0] = NOT_NULL_BYTE_LOW; while (--length >= 0) { dst[1 + length] = (byte) (~value[length]); } return dst; } if (value == null) { byte[] dst = new byte[prefixPadding + 1 + suffixPadding]; dst[prefixPadding] = NULL_BYTE_LOW; return dst; } int length = value.length; byte[] dst = new byte[prefixPadding + 1 + length + suffixPadding]; dst[prefixPadding] = NOT_NULL_BYTE_LOW; while (--length >= 0) { dst[prefixPadding + 1 + length] = (byte) (~value[length]); } return dst; } }