Utf32Reader.java - 源码阅读网

aalto-xml-1.1.0.jar

|

com.fasterxml:aalto-xml:1.1.0

META-INF

com

fasterxml

aalto

io

AsyncInputFeeder.java

package-info.java

async

AsyncXMLStreamReader.java

util

evt

stax

in

NsDeclaration.java

ByteBasedPNameFactory.java

PName.java

NsBinding.java

FixedNsContext.java

XmlScanner.java

Utf32Reader.java

InputCharTypes.java

ByteBasedScanner.java

ByteSourceBootstrapper.java

PNameC.java

ElementScope.java

PName3.java

CharSourceBootstrapper.java

AttributeCollector.java

CharBasedPNameTable.java

PNameN.java

ByteBasedPName.java

PName1.java

EntityNames.java

MergedStream.java

ReaderScanner.java

PName2.java

InputBootstrapper.java

ByteBasedPNameTable.java

Utf8Scanner.java

ReaderConfig.java

StreamScanner.java

AsyncByteArrayFeeder.java

UncheckedStreamException.java

sax

WFCException.java

AsyncXMLInputFactory.java

impl

dom

AsyncByteBufferFeeder.java

ValidationException.java

out

annotations

test

Utf32Reader.java

清空

类结构

/* Woodstox Lite ("wool") XML processor * * Copyright (c) 2006- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in the file LICENSE which is * included with the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.fasterxml.aalto.in; import java.io.*; import com.fasterxml.aalto.in.ReaderConfig; import com.fasterxml.aalto.util.XmlConsts; /** * Since JDK does not come with UTF-32/UCS-4, let's implement a simple * decoder to use. */ public final class Utf32Reader extends Reader { final static char NULL_CHAR = (char) 0; final ReaderConfig mConfig; protected InputStream mIn; protected byte[] mBuffer; protected int mPtr; protected int mLength; final boolean mBigEndian; /** * Although input is fine with full Unicode set, Java still uses * 16-bit chars, so we may have to split high-order chars into * surrogate pairs. */ char mSurrogate = NULL_CHAR; /** * Total read character count; used for error reporting purposes */ int mCharCount = 0; /** * Total read byte count; used for error reporting purposes */ int mByteCount = 0; /* //////////////////////////////////////// // Life-cycle //////////////////////////////////////// */ public Utf32Reader(ReaderConfig cfg, InputStream in, byte[] buf, int ptr, int len, boolean isBigEndian) { mConfig = cfg; mBigEndian = isBigEndian; } /* //////////////////////////////////////// // Reader API //////////////////////////////////////// */ @Override public void close() throws IOException { InputStream in = mIn; if (in != null) { mIn = null; freeBuffers(); in.close(); } } char[] mTmpBuf = null; /** * Although this method is implemented by the base class, AND it should * never be called by Woodstox code, let's still implement it bit more * efficiently just in case */ @Override public int read() throws IOException { if (mTmpBuf == null) { mTmpBuf = new char[1]; } if (read(mTmpBuf, 0, 1) < 1) { return -1; } return mTmpBuf[0]; } /* //////////////////////////////////////// // Public API //////////////////////////////////////// */ @Override public int read(char[] cbuf, int start, int len) throws IOException { // Already EOF? if (mBuffer == null) { return -1; } if (len < 1) { return len; } // Let's then ensure there's enough room... if (start < 0 || (start+len) > cbuf.length) { reportBounds(cbuf, start, len); } len += start; int outPtr = start; // Ok, first; do we have a surrogate from last round? if (mSurrogate != NULL_CHAR) { cbuf[outPtr++] = mSurrogate; mSurrogate = NULL_CHAR; // No need to load more, already got one char } else { /* Note: we'll try to avoid blocking as much as possible. As a * result, we only need to get 4 bytes for a full char. */ int left = (mLength - mPtr); if (left < 4) { if (!loadMore(left)) { // (legal) EOF? return -1; } } } byte[] buf = mBuffer; main_loop: while (outPtr < len) { int ptr = mPtr; int ch; if (mBigEndian) { ch = (buf[ptr] << 24) | ((buf[ptr+1] & 0xFF) << 16) | ((buf[ptr+2] & 0xFF) << 8) | (buf[ptr+3] & 0xFF); } else { ch = (buf[ptr] & 0xFF) | ((buf[ptr+1] & 0xFF) << 8) | ((buf[ptr+2] & 0xFF) << 16) | (buf[ptr+3] << 24); } mPtr += 4; // Does it need to be split to surrogates? if (ch >= 0xD800) { // Illegal? if (ch > XmlConsts.MAX_UNICODE_CHAR) { reportInvalid(ch, outPtr-start, "(above "+Integer.toHexString(XmlConsts.MAX_UNICODE_CHAR)+") "); } if (ch > 0xFFFF) { // need to split into surrogates? ch -= 0x10000; // to normalize it starting with 0x0 cbuf[outPtr++] = (char) (0xD800 + (ch >> 10)); // hmmh. can this ever be 0? (not legal, at least?) ch = (0xDC00 | (ch & 0x03FF)); // Room for second part? if (outPtr >= len) { // nope mSurrogate = (char) ch; break main_loop; } } else { // in 16-bit range... just need validity checks if (ch < 0xE000) { reportInvalid(ch, outPtr-start, "(a surrogate char) "); } else if (ch >= 0xFFFE) { reportInvalid(ch, outPtr-start, ""); } } } cbuf[outPtr++] = (char) ch; if (mPtr >= mLength) { break main_loop; } } len = outPtr - start; mCharCount += len; return len; } /* //////////////////////////////////////// // Internal methods //////////////////////////////////////// */ /** * @param available Number of "unused" bytes in the input buffer * * @return True, if enough bytes were read to allow decoding of at least * one full character; false if EOF was encountered instead. */ private boolean loadMore(int available) throws IOException { mByteCount += (mLength - available); // Bytes that need to be moved to the beginning of buffer? if (available > 0) { if (mPtr > 0) { for (int i = 0; i < available; ++i) { mBuffer[i] = mBuffer[mPtr+i]; } mPtr = 0; } mLength = available; } else { /* Ok; here we can actually reasonably expect an EOF, * so let's do a separate read right away: */ mPtr = 0; int count = mIn.read(mBuffer); if (count < 1) { mLength = 0; if (count < 0) { // -1 freeBuffers(); // to help GC? return false; } // 0 count is no good; let's err out reportStrangeStream(); } mLength = count; } /* Need at least 4 bytes; if we don't get that many, it's an * error. */ while (mLength < 4) { int count = mIn.read(mBuffer, mLength, mBuffer.length - mLength); if (count < 1) { if (count < 0) { // -1, EOF... no good! freeBuffers(); // to help GC? reportUnexpectedEOF(mLength, 4); } // 0 count is no good; let's err out reportStrangeStream(); } mLength += count; } return true; } public final void freeBuffers() { byte[] buf = mBuffer; if (buf != null) { mBuffer = null; if (mConfig != null) { mConfig.freeFullBBuffer(buf); } } } /* ////////////////////////////////////////// // Error reporting ////////////////////////////////////////// */ private void reportUnexpectedEOF(int gotBytes, int needed) throws IOException { int bytePos = mByteCount + gotBytes; int charPos = mCharCount; throw new CharConversionException("Unexpected EOF in the middle of a 4-byte UTF-32 char: got " +gotBytes+", needed "+needed +", at char #"+charPos+", byte #"+bytePos+")"); } private void reportInvalid(int value, int offset, String msg) throws IOException { int bytePos = mByteCount + mPtr - 1; int charPos = mCharCount + offset; throw new CharConversionException("Invalid UTF-32 character 0x" +Integer.toHexString(value) +msg+" at char #"+charPos+", byte #"+bytePos+")"); } protected void reportBounds(char[] cbuf, int start, int len) throws IOException { throw new ArrayIndexOutOfBoundsException("read(buf,"+start+","+len+"), cbuf["+cbuf.length+"]"); } protected void reportStrangeStream() throws IOException { throw new IOException("Strange I/O stream, returned 0 bytes on read"); } }

查找资源

输入类名或文件名

类结构窗口