/*
* For work developed by the HSQL Development Group:
*
* Copyright (c) 2001-2016, The HSQL Development Group
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the HSQL Development Group nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
*
* For work originally developed by the Hypersonic SQL Group:
*
* Copyright (c) 1995-2000, The Hypersonic SQL Group.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the Hypersonic SQL Group nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE HYPERSONIC SQL GROUP,
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This software consists of voluntary contributions made by many individuals
* on behalf of the Hypersonic SQL Group.
*/
package org.hsqldb.lib;
import java.io.
IOException;
import java.io.
InputStream;
import java.io.
UTFDataFormatException;
import org.hsqldb.map.
BitMap;
/**
* Collection of static methods for converting strings between different
* formats and to and from byte arrays.<p>
*
* Includes two methods based on Hypersonic code as indicated.
*
* @author Thomas Mueller (Hypersonic SQL Group)
* @author Fred Toussi (fredt@users dot sourceforge.net)
* @version 2.3.5
* @since 1.7.2
*/
public class
StringConverter {
private static final byte[]
HEXBYTES = {
(byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
(byte) '6', (byte) '7', (byte) '8', (byte) '9', (byte) 'a', (byte) 'b',
(byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f'
};
private static int
getNibble(int
value) {
if (
value >= '0' &&
value <= '9') {
return
value - '0';
}
if (
value >= 'a' &&
value <= 'f') {
return 10 +
value - 'a';
}
if (
value >= 'A' &&
value <= 'F') {
return 10 +
value - 'A';
}
return -1;
}
/**
* Converts a hexadecimal string into a byte array
*
*
* @param s hexadecimal string
*
* @return byte array for the hex string
* @throws IOException
*/
public static byte[]
hexStringToByteArray(
String s) throws
IOException {
int
l =
s.
length();
byte[]
data = new byte[
l / 2 + (
l % 2)];
int
n,
b = 0;
boolean
high = true;
int
i = 0;
for (int
j = 0;
j <
l;
j++) {
char
c =
s.
charAt(
j);
if (
c == ' ') {
continue;
}
n =
getNibble(
c);
if (
n == -1) {
throw new
IOException(
"hexadecimal string contains non hex character"); //NOI18N
}
if (
high) {
b = (
n & 0xf) << 4;
high = false;
} else {
b += (
n & 0xf);
high = true;
data[
i++] = (byte)
b;
}
}
if (!
high) {
throw new
IOException(
"hexadecimal string with odd number of characters"); //NOI18N
}
if (
i <
data.length) {
data = (byte[])
ArrayUtil.
resizeArray(
data,
i);
}
return
data;
}
/**
* Compacts a bit string into a BitMap
* @param s bit string
* @return BitMap for the bit string
* @throws IOException
*/
public static
BitMap sqlBitStringToBitMap(
String s) throws
IOException {
int
l =
s.
length();
int
n;
int
bitIndex = 0;
BitMap map = new
BitMap(0, true);
for (int
j = 0;
j <
l;
j++) {
char
c =
s.
charAt(
j);
if (
c == ' ') {
continue;
}
n =
getNibble(
c);
if (
n != 0 &&
n != 1) {
throw new
IOException(
"hexadecimal string contains non hex character"); //NOI18N
}
if (
n == 1) {
map.
set(
bitIndex);
}
bitIndex++;
}
map.
setSize(
bitIndex);
return
map;
}
/**
* Converts a byte array into a hexadecimal string
*
*
* @param b byte array
*
* @return hex string
*/
public static
String byteArrayToHexString(byte[]
b) {
int
len =
b.length;
char[]
s = new char[
len * 2];
for (int
i = 0,
j = 0;
i <
len;
i++) {
int
c = ((int)
b[
i]) & 0xff;
s[
j++] = (char)
HEXBYTES[
c >> 4 & 0xf];
s[
j++] = (char)
HEXBYTES[
c & 0xf];
}
return new
String(
s);
}
/**
* Converts a byte array into an SQL hexadecimal string
*
*
* @param b byte array
*
* @return hex string
*/
public static
String byteArrayToSQLHexString(byte[]
b) {
int
len =
b.length;
char[]
s = new char[
len * 2 + 3];
s[0] = 'X';
s[1] = '\'';
int
j = 2;
for (int
i = 0;
i <
len;
i++) {
int
c = ((int)
b[
i]) & 0xff;
s[
j++] = (char)
HEXBYTES[
c >> 4 & 0xf];
s[
j++] = (char)
HEXBYTES[
c & 0xf];
}
s[
j] = '\'';
return new
String(
s);
}
/**
* Converts a byte array into a bit string
*
*
* @param bytes byte array
* @param bitCount number of bits
* @return hex string
*/
public static
String byteArrayToBitString(byte[]
bytes, int
bitCount) {
char[]
s = new char[
bitCount];
for (int
j = 0;
j <
bitCount;
j++) {
byte
b =
bytes[
j / 8];
s[
j] =
BitMap.
isSet(
b,
j % 8) ? '1'
: '0';
}
return new
String(
s);
}
/**
* Converts a byte array into an SQL binary string
*
*
* @param bytes byte array
* @param bitCount number of bits
* @return hex string
*/
public static
String byteArrayToSQLBitString(byte[]
bytes, int
bitCount) {
char[]
s = new char[
bitCount + 3];
s[0] = 'B';
s[1] = '\'';
int
pos = 2;
for (int
j = 0;
j <
bitCount;
j++) {
byte
b =
bytes[
j / 8];
s[
pos++] =
BitMap.
isSet(
b,
j % 8) ? '1'
: '0';
}
s[
pos] = '\'';
return new
String(
s);
}
/**
* Converts a byte array into hexadecimal characters which are written as
* ASCII to the given output stream.
*
* @param o output array
* @param from offset into output array
* @param b input array
* @return written count
*/
public static int
writeHexBytes(byte[]
o, final int
from, byte[]
b) {
int
len =
b.length;
int
pos =
from;
for (int
i = 0;
i <
len;
i++) {
int
c = ((int)
b[
i]) & 0xff;
o[
pos++] =
HEXBYTES[
c >> 4 & 0xf];
o[
pos++] =
HEXBYTES[
c & 0xf];
}
return
pos -
from;
}
public static
String byteArrayToString(byte[]
b,
String charset) {
try {
return (
charset == null) ? new
String(
b)
: new
String(
b,
charset);
} catch (
Exception e) {}
return null;
}
/**
* Hsqldb specific encoding used only for log files. The SQL statements that
* need to be written to the log file (input) are Java Unicode strings.
* input is converted into a 7bit escaped ASCII string (output)with the
* following transformations. All characters outside the 0x20-7f range are
* converted to a escape sequence and added to output. If a backslash
* character is immediately followed by 'u', the backslash character is
* converted to escape sequence and added to output. All the remaining
* characters in input are added to output without conversion. The escape
* sequence is backslash, letter u, xxxx, where xxxx is the hex
* representation of the character code. (fredt@users)<p>
*
* Method based on Hypersonic Code
*
* @param b output stream to wite to
* @param s Java string
* @param doubleSingleQuotes boolean
*/
public static void
stringToUnicodeBytes(
HsqlByteArrayOutputStream b,
String s, boolean
doubleSingleQuotes) {
if (
s == null) {
return;
}
final int
len =
s.
length();
int
extras = 0;
if (
len == 0) {
return;
}
b.
ensureRoom(
len * 2 + 5);
for (int
i = 0;
i <
len;
i++) {
char
c =
s.
charAt(
i);
if (
c == '\\') {
if ((
i <
len - 1) && (
s.
charAt(
i + 1) == 'u')) {
b.
writeNoCheck(
c); // encode the \ as unicode, so 'u' is ignored
b.
writeNoCheck('u');
b.
writeNoCheck('0');
b.
writeNoCheck('0');
b.
writeNoCheck('5');
b.
writeNoCheck('c');
extras += 5;
} else {
b.
write(
c);
}
} else if ((
c >= 0x0020) && (
c <= 0x007f)) {
b.
writeNoCheck(
c); // this is 99%
if (
c == '\'' &&
doubleSingleQuotes) {
b.
writeNoCheck(
c);
extras++;
}
} else {
b.
writeNoCheck('\\');
b.
writeNoCheck('u');
b.
writeNoCheck(
HEXBYTES[(
c >> 12) & 0xf]);
b.
writeNoCheck(
HEXBYTES[(
c >> 8) & 0xf]);
b.
writeNoCheck(
HEXBYTES[(
c >> 4) & 0xf]);
b.
writeNoCheck(
HEXBYTES[
c & 0xf]);
extras += 5;
}
if (
extras >
len) {
b.
ensureRoom(
len +
extras + 5);
extras = 0;
}
}
}
// fredt@users 20020522 - fix for 557510 - backslash bug
// this legacy bug resulted from forward reading the input when a backslash
// was present and manifested itself when a backslash was followed
// immediately by a character outside the 0x20-7f range in a database field.
/**
* Hsqldb specific decoding used only for log files. This method converts
* the 7 bit escaped ASCII strings in a log file back into Java Unicode
* strings. See stringToUnicodeBytes() above. <p>
*
* Method based on Hypersonic Code
*
* @param s encoded ASCII string in byte array
* @return Java string
*/
public static
String unicodeStringToString(
String s) {
if ((
s == null) || (
s.
indexOf("\\u") == -1)) {
return
s;
}
int
len =
s.
length();
char[]
b = new char[
len];
int
j = 0;
for (int
i = 0;
i <
len;
i++) {
char
c =
s.
charAt(
i);
if (
c == '\\' &&
i <
len - 5) {
char
c1 =
s.
charAt(
i + 1);
if (
c1 == 'u') {
i++;
// 4 characters read should always return 0-15
int
k =
getNibble(
s.
charAt(++
i)) << 12;
k +=
getNibble(
s.
charAt(++
i)) << 8;
k +=
getNibble(
s.
charAt(++
i)) << 4;
k +=
getNibble(
s.
charAt(++
i));
b[
j++] = (char)
k;
} else {
b[
j++] =
c;
}
} else {
b[
j++] =
c;
}
}
return new
String(
b, 0,
j);
}
public static
String readUTF(byte[]
bytearr, int
offset,
int
length) throws
IOException {
char[]
buf = new char[
length];
return
readUTF(
bytearr,
offset,
length,
buf);
}
public static
String readUTF(byte[]
bytearr, int
offset, int
length,
char[]
buf) throws
IOException {
int
bcount = 0;
int
c,
char2,
char3;
int
count = 0;
while (
count <
length) {
c = (int)
bytearr[
offset +
count];
if (
bcount ==
buf.length) {
buf = (char[])
ArrayUtil.
resizeArray(
buf,
length);
}
if (
c > 0) {
/* 0xxxxxxx*/
count++;
buf[
bcount++] = (char)
c;
continue;
}
c &= 0xff;
switch (
c >> 4) {
case 12 :
case 13 :
/* 110x xxxx 10xx xxxx*/
count += 2;
if (
count >
length) {
throw new
UTFDataFormatException();
}
char2 = (int)
bytearr[
offset +
count - 1];
if ((
char2 & 0xC0) != 0x80) {
throw new
UTFDataFormatException();
}
buf[
bcount++] = (char) (((
c & 0x1F) << 6)
| (
char2 & 0x3F));
break;
case 14 :
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
if (
count >
length) {
throw new
UTFDataFormatException();
}
char2 = (int)
bytearr[
offset +
count - 2];
char3 = (int)
bytearr[
offset +
count - 1];
if (((
char2 & 0xC0) != 0x80) || ((
char3 & 0xC0) != 0x80)) {
throw new
UTFDataFormatException();
}
buf[
bcount++] = (char) (((
c & 0x0F) << 12)
| ((
char2 & 0x3F) << 6)
| ((
char3 & 0x3F) << 0));
break;
default :
/* 10xx xxxx, 1111 xxxx */
throw new
UTFDataFormatException();
}
}
// The number of chars produced may be less than length
return new
String(
buf, 0,
bcount);
}
/**
* Writes a string to the specified DataOutput using UTF-8 encoding in a
* machine-independent manner.
* <p>
* @param str a string to be written.
* @param out destination to write to
* @return The number of bytes written out.
*/
public static int
stringToUTFBytes(
String str,
HsqlByteArrayOutputStream out) {
int
strlen =
str.
length();
int
c,
count = 0;
if (
out.
count +
strlen + 8 >
out.
buffer.length) {
out.
ensureRoom(
strlen + 8);
}
char[]
arr =
str.
toCharArray();
for (int
i = 0;
i <
strlen;
i++) {
c =
arr[
i];
if (
c >= 0x0001 &&
c <= 0x007F) {
out.
buffer[
out.
count++] = (byte)
c;
count++;
} else if (
c > 0x07FF) {
out.
buffer[
out.
count++] = (byte) (0xE0 | ((
c >> 12) & 0x0F));
out.
buffer[
out.
count++] = (byte) (0x80 | ((
c >> 6) & 0x3F));
out.
buffer[
out.
count++] = (byte) (0x80 | ((
c >> 0) & 0x3F));
count += 3;
} else {
out.
buffer[
out.
count++] = (byte) (0xC0 | ((
c >> 6) & 0x1F));
out.
buffer[
out.
count++] = (byte) (0x80 | ((
c >> 0) & 0x3F));
count += 2;
}
if (
out.
count + 8 >
out.
buffer.length) {
out.
ensureRoom(
strlen -
i + 8);
}
}
return
count;
}
public static int
getUTFSize(
String s) {
int
len = (
s == null) ? 0
:
s.
length();
int
l = 0;
for (int
i = 0;
i <
len;
i++) {
int
c =
s.
charAt(
i);
if ((
c >= 0x0001) && (
c <= 0x007F)) {
l++;
} else if (
c > 0x07FF) {
l += 3;
} else {
l += 2;
}
}
return
l;
}
/**
* Using an output stream, returns a String from an InputStream.
*
* @param is InputStream to read from
* @param encoding character encoding of the string
* @throws IOException
* @return a Java string
*/
public static
String inputStreamToString(
InputStream is,
String encoding) throws
IOException {
HsqlByteArrayOutputStream baOS = new
HsqlByteArrayOutputStream(1024);
while (true) {
int
c =
is.
read();
if (
c == -1) {
break;
}
baOS.
write(
c);
}
return new
String(
baOS.
getBuffer(), 0,
baOS.
size(),
encoding);
}
// fredt@users 20020130 - patch 497872 by Nitin Chauhan - use byte[] of exact size
/**
* Returns the quoted version of the string using the quotechar argument.
* doublequote argument indicates whether each instance of quotechar inside
* the string is doubled.<p>
*
* null string argument returns null. If the caller needs the literal
* "NULL" it should created it itself<p>
*
* @param s Java string
* @param quoteChar character used for quoting
* @param extraQuote true if quoteChar itself should be repeated
* @return String
*/
public static
String toQuotedString(
String s, char
quoteChar,
boolean
extraQuote) {
if (
s == null) {
return null;
}
int
count =
extraQuote ?
count(
s,
quoteChar)
: 0;
int
len =
s.
length();
char[]
b = new char[2 +
count +
len];
int
i = 0;
int
j = 0;
b[
j++] =
quoteChar;
for (;
i <
len;
i++) {
char
c =
s.
charAt(
i);
b[
j++] =
c;
if (
extraQuote &&
c ==
quoteChar) {
b[
j++] =
c;
}
}
b[
j] =
quoteChar;
return new
String(
b);
}
/**
* Counts Character c in String s
*
* @param s Java string
* @param c character to count
* @return int count
*/
static int
count(final
String s, final char
c) {
int
pos = 0;
int
count = 0;
if (
s != null) {
while ((
pos =
s.
indexOf(
c,
pos)) > -1) {
count++;
pos++;
}
}
return
count;
}
/**
* Converts the string to an HTML representation in the ASCII character set
* and appends it to a byte array output stream.
*
* @param b the output byte array output stream
* @param s the input string
*/
public static void
stringToHtmlBytes(
HsqlByteArrayOutputStream b,
String s) {
if (
s == null) {
return;
}
final int
len =
s.
length();
char[]
chars;
if (
len == 0) {
return;
}
chars =
s.
toCharArray();
b.
ensureRoom(
len);
for (int
i = 0;
i <
len;
i++) {
char
c =
chars[
i];
if (
c > 0x007f ||
c == '"' ||
c == '&' ||
c == '<' ||
c == '>') {
int
codePoint =
Character.
codePointAt(
chars,
i);
if (
Character.
charCount(
codePoint) == 2) {
i++;
}
b.
ensureRoom(16);
b.
writeNoCheck('&');
b.
writeNoCheck('#');
b.
writeBytes(
String.
valueOf(
codePoint));
b.
writeNoCheck(';');
} else if (
c < 0x0020) {
b.
writeNoCheck(' ');
} else {
b.
writeNoCheck(
c);
}
}
}
/**
* Returns a string representation in UUID form from a binary string.
*
* UUID string is composed of 8-4-4-4-12 hexadecimal characters.
*
* @param b the byte array
* @return UUID string form
*/
public static
String toStringUUID(byte[]
b) {
if (
b == null) {
return null;
}
if (
b.length != 16) {
throw new
NumberFormatException();
}
char[]
chars = new char[36];
int
hexIndex;
for (int
i = 0,
j = 0;
i <
b.length; ) {
hexIndex = (
b[
i] & 0xf0) >> 4;
chars[
j++] = (char)
HEXBYTES[
hexIndex];
hexIndex =
b[
i] & 0xf;
chars[
j++] = (char)
HEXBYTES[
hexIndex];
i++;
if (
i >= 4 &&
i <= 10 && (
i % 2) == 0) {
chars[
j++] = '-';
}
}
return new
String(
chars);
}
public static int
writeUUIDHexBytes(byte[]
o, final int
from, byte[]
b) {
if (
b.length != 16) {
throw new
NumberFormatException();
}
int
pos =
from;
int
hexIndex;
for (int
i = 0;
i <
b.length; ) {
hexIndex = (
b[
i] & 0xf0) >> 4;
o[
pos++] =
HEXBYTES[
hexIndex];
hexIndex =
b[
i] & 0xf;
o[
pos++] =
HEXBYTES[
hexIndex];
i++;
if (
i >= 4 &&
i <= 10 && (
i % 2) == 0) {
o[
pos++] = '-';
}
}
return
pos -
from;
}
/**
* Returns a byte[] representation in UUID form from a UUID string.
*
* @param s the UUID string
* @return byte array
*/
public static byte[]
toBinaryUUID(
String s) {
if (
s == null) {
return null;
}
if (
s.
length() != 36) {
throw new
NumberFormatException();
}
byte[]
bytes = new byte[16];
for (int
i = 0,
j = 0;
i <
bytes.length; ) {
char
c =
s.
charAt(
j++);
int
high =
getNibble(
c);
c =
s.
charAt(
j++);
bytes[
i] = (byte) ((
high << 4) +
getNibble(
c));
i++;
if (
i >= 4 &&
i <= 10 && (
i % 2) == 0) {
c =
s.
charAt(
j++);
if (
c != '-') {
throw new
NumberFormatException();
}
}
}
return
bytes;
}
}