package com.fasterxml.aalto.in;
import java.io.*;
import java.util.
ArrayList;
import java.util.
Iterator;
import org.xml.sax.
Attributes;
import org.xml.sax.
ContentHandler;
import org.xml.sax.
SAXException;
import org.xml.sax.ext.
LexicalHandler;
import javax.xml.
XMLConstants;
import javax.xml.namespace.
NamespaceContext;
import javax.xml.namespace.
QName;
import javax.xml.stream.
Location;
import javax.xml.stream.
XMLStreamConstants;
import javax.xml.stream.
XMLStreamException;
import org.codehaus.stax2.
XMLStreamLocation2;
import org.codehaus.stax2.typed.
Base64Variant;
import org.codehaus.stax2.typed.
TypedArrayDecoder;
import org.codehaus.stax2.typed.
TypedValueDecoder;
import org.codehaus.stax2.typed.
TypedXMLStreamException;
import org.codehaus.stax2.ri.typed.
CharArrayBase64Decoder;
import com.fasterxml.aalto.
WFCException;
import com.fasterxml.aalto.impl.*;
import com.fasterxml.aalto.util.*;
/**
* This is the abstract base class for all scanner implementations,
* defining operations the actual parser requires from the low-level
* scanners.
* Scanners are encoding and input type (byte, char / stream, block)
* specific, so there are many implementations.
*/
public abstract class
XmlScanner
implements
XmlConsts,
XMLStreamConstants,
NamespaceContext
{
// // // Constants:
/**
* String that identifies CDATA section (after "<![" prefix)
*/
final protected
String CDATA_STR = "CDATA[";
/**
* This token type signifies end-of-input, in cases where it can be
* returned. In other cases, an exception may be thrown.
*/
public final static int
TOKEN_EOI = -1;
/**
* This constant defines the highest Unicode character allowed
* in XML content.
*/
protected final static int
MAX_UNICODE_CHAR = 0x10FFFF;
protected final static int
INT_NULL = 0;
protected final static int
INT_CR = (int) '\r';
protected final static int
INT_LF = (int) '\n';
protected final static int
INT_TAB = (int) '\t';
protected final static int
INT_SPACE = 0x0020;
protected final static int
INT_HYPHEN = (int) '-';
protected final static int
INT_QMARK = (int) '?';
protected final static int
INT_AMP = (int) '&';
protected final static int
INT_LT = (int) '<';
protected final static int
INT_GT = (int) '>';
protected final static int
INT_QUOTE = (int) '"';
protected final static int
INT_APOS = (int) '\'';
protected final static int
INT_EXCL = (int) '!';
protected final static int
INT_COLON = (int) ':';
protected final static int
INT_LBRACKET = (int) '[';
protected final static int
INT_RBRACKET = (int) ']';
protected final static int
INT_SLASH = (int) '/';
protected final static int
INT_EQ = (int) '=';
protected final static int
INT_A = (int) 'A';
protected final static int
INT_F = (int) 'F';
protected final static int
INT_a = (int) 'a';
protected final static int
INT_f = (int) 'f';
protected final static int
INT_z = (int) 'z';
protected final static int
INT_0 = (int) '0';
protected final static int
INT_9 = (int) '9';
// // // Config for bound PName cache:
/**
* Let's activate cache quite soon, no need to wait for hundreds
* of misses; just try to avoid cache construction if all we get
* is soap envelope element or such.
*/
private final static int
BIND_MISSES_TO_ACTIVATE_CACHE = 10;
/**
* Size of the bind cache can be reasonably small, and should
* still get high enough hit rate
*/
private final static int
BIND_CACHE_SIZE = 0x40;
private final static int
BIND_CACHE_MASK = 0x3F;
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
protected final
ReaderConfig _config;
/**
* Whether validity checks (wrt. name and text characters)
* and normalization (linefeeds) is to be
* done using xml 1.1 rules, or basic xml 1.0 rules. Default
* is 1.0.
*/
protected final boolean
_xml11;
protected final boolean
_cfgCoalescing;
/* Note: non-final since it may need to be disabled after
* construction.
*/
protected boolean
_cfgLazyParsing;
/*
/**********************************************************************
/* Tokenization state
/**********************************************************************
*/
protected int
_currToken =
START_DOCUMENT;
protected boolean
_tokenIncomplete = false;
/**
* Number of START_ELEMENT events returned for which no END_ELEMENT
* has been returned; including current event.
*/
protected int
_depth = 0;
/**
* Textual content of the current event
*/
protected final
TextBuilder _textBuilder;
/**
* Flag set to indicate that an entity is pending
*/
protected boolean
_entityPending = false;
/*
/**********************************************************************
/* Name/String handling
/**********************************************************************
*/
/**
* Similarly, need a char buffer for actual String construction
* (in future, could perhaps use StringBuilder?). It is used
* for holding things like names (element, attribute), and
* attribute values.
*/
protected char[]
_nameBuffer = null;
/**
* Current name associated with the token, if any. Name of the
* current element, target of processing instruction, or name
* of an unexpanded entity.
*/
protected
PName _tokenName = null;
/*
/**********************************************************************
/* Element information
/**********************************************************************
*/
/**
* Flag that is used if the current state is <code>START_ELEMENT</code>
* or <code>END_ELEMENT</code>, to indicate if the underlying physical
* tag is a so-called empty tag (one ending with "/>")
*/
protected boolean
_isEmptyTag = false;
/**
* Information about the current element on the stack
*/
protected
ElementScope _currElem;
/**
* Public id of the current event (DTD), if any.
*/
protected
String _publicId;
/**
* System id of the current event (DTD), if any.
*/
protected
String _systemId;
/*
/**********************************************************************
/* Namespace binding
/**********************************************************************
*/
/**
* Pointer to the last namespace declaration encountered. Because of backwards
* linking, it also serves as the head of the linked list of all active
* namespace declarations starting from the most recent one.
*/
protected
NsDeclaration _lastNsDecl = null;
/**
* This is a temporary state variable, valid during START_ELEMENT
* event. For those events, contains number of namespace declarations
* available. For END_ELEMENT, this count is computed on the fly.
*/
protected int
_currNsCount = 0;
/**
* Default namespace binding is a per-document singleton, like
* explicit bindings, and used for elements (never for attributes).
*/
protected
NsBinding _defaultNs =
NsBinding.
createDefaultNs();
/**
* Array containing all prefix bindings needed within the current
* document, so far (if any). These bindings are not in a particular
* order, and they specifically do NOT represent actual namespace
* declarations parsed from xml content.
*/
protected
NsBinding[]
_nsBindings;
protected int
_nsBindingCount = 0;
/**
* Although unbound pname instances can be easily and safely reused,
* bound ones are per-document. However, it makes sense to try to
* reuse them too; at least using a minimal static cache, activate
* only after certain number of cache misses (to avoid overhead for
* tiny documents, or documents with few or no namespace prefixes).
*/
protected
PName[]
_nsBindingCache = null;
protected int
_nsBindMisses = 0;
/*
/**********************************************************************
/* Support for non-transient NamespaceContext
/**********************************************************************
*/
/**
* Last returned {@link NamespaceContext}, created for a call
* to {@link #getNonTransientNamespaceContext}, iff this would
* still be a valid context.
*/
protected
FixedNsContext _lastNsContext =
FixedNsContext.
EMPTY_CONTEXT;
/*
/**********************************************************************
/* Attribute info
/**********************************************************************
*/
protected final
AttributeCollector _attrCollector;
protected int
_attrCount = 0;
/*
/**********************************************************************
/* Minimal location info for all impls
/**********************************************************************
*/
/**
* Number of bytes that were read and processed before the contents
* of the current buffer; used for calculating absolute offsets.
*/
protected long
_pastBytesOrChars;
/**
* The row on which the character to read next is on. Note that
* it is 0-based, so API will generally add one to it before
* returning the value
*/
protected int
_currRow;
/**
* Offset used to calculate the column value given current input
* buffer pointer. May be negative, if the first character of the
* row was contained within an earlier buffer.
*/
protected int
_rowStartOffset;
/**
* Offset (in chars or bytes) at start of current token
*/
protected long
_startRawOffset;
/**
* Current row at start of current (last returned) token
*/
protected long
_startRow = -1L;
/**
* Current column at start of current (last returned) token
*/
protected long
_startColumn = -1L;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
protected
XmlScanner(
ReaderConfig cfg)
{
_config =
cfg;
_cfgCoalescing =
cfg.
willCoalesceText();
_cfgLazyParsing =
cfg.
willParseLazily();
_xml11 =
cfg.
isXml11();
_textBuilder =
TextBuilder.
createRecyclableBuffer(
_config);
_attrCollector = new
AttributeCollector(
cfg);
_nameBuffer =
cfg.
allocSmallCBuffer(
ReaderConfig.
DEFAULT_SMALL_BUFFER_LEN);
_currRow = 0;
}
/**
* Method called at point when the parsing process has ended (either
* by encountering end of the input, or via explicit close), and
* buffers can and should be released.
*
* @param forceCloseSource True if the underlying input source is
* to be closed, independent of whether auto-close has been set
* to true via configuration (or if the scanner manages the input
* source)
*/
public final void
close(boolean
forceCloseSource)
throws
XMLStreamException
{
_releaseBuffers();
if (
forceCloseSource ||
_config.
willAutoCloseInput()) {
try {
_closeSource();
} catch (
IOException ioe) {
throw new
IoStreamException(
ioe);
}
}
}
protected void
_releaseBuffers()
{
_textBuilder.
recycle(true);
if (
_nameBuffer != null) {
char[]
buf =
_nameBuffer;
_nameBuffer = null;
_config.
freeSmallCBuffer(
buf);
}
}
protected abstract void
_closeSource() throws
IOException;
/*
/**********************************************************************
/* Package access methods, needed by SAX impl
/**********************************************************************
*/
public
ReaderConfig getConfig() { return
_config; }
public
AttributeCollector getAttrCollector() { return
_attrCollector; }
/*
/**********************************************************************
/* Public scanner interface, iterating
/**********************************************************************
*/
// // // First, main iteration methods
public abstract int
nextFromProlog(boolean
isProlog) throws
XMLStreamException;
public abstract int
nextFromTree() throws
XMLStreamException;
/**
* This method is called to ensure that the current token/event has been
* completely parsed, such that we have all the data needed to return
* it (textual content, PI data, comment text etc)
*/
protected abstract void
finishToken() throws
XMLStreamException;
/**
* This method is called to essentially skip remaining of the
* current token (data of PI etc)
*
* @return True If by skipping we also figured out following event
* type (and assigned its type to _currToken); false if that remains
* to be done
*/
protected final boolean
skipToken()
throws
XMLStreamException
{
_tokenIncomplete = false;
switch (
_currToken) {
case
PROCESSING_INSTRUCTION:
skipPI();
break;
case
CHARACTERS:
if (
skipCharacters()) { // encountered an entity
// _tokenName already set, just need to set curr token
_currToken =
ENTITY_REFERENCE;
return true;
}
if (
_cfgCoalescing) {
if (
skipCoalescedText()) { // encountered an entity
_currToken =
ENTITY_REFERENCE;
return true;
}
}
break;
case
COMMENT:
skipComment();
break;
case
SPACE:
skipSpace();
break;
case
CDATA:
skipCData();
if (
_cfgCoalescing) {
skipCoalescedText();
if (
_entityPending) { // encountered an entity
_currToken =
ENTITY_REFERENCE;
return true;
}
}
break;
case
DTD:
finishDTD(false); // false -> skip subset text
break;
default:
throw new
Error("Internal error, unexpected incomplete token type "+
ErrorConsts.
tokenTypeDesc(
_currToken));
}
return false;
}
/*
/**********************************************************************
/* Public scanner interface, location access
/**********************************************************************
*/
/**
* @return Current input location
*/
public abstract
XMLStreamLocation2 getCurrentLocation();
public final
XMLStreamLocation2 getStartLocation()
{
// !!! TODO: deal with impedance wrt int/long (flaw in Stax API)
int
row = (int)
_startRow;
int
col = (int)
_startColumn;
return
LocationImpl.
fromZeroBased(
_config.
getPublicId(),
_config.
getSystemId(),
_startRawOffset,
row,
col);
}
public abstract long
getStartingByteOffset();
public abstract long
getStartingCharOffset();
public abstract long
getEndingByteOffset() throws
XMLStreamException;
public abstract long
getEndingCharOffset() throws
XMLStreamException;
public
XMLStreamLocation2 getEndLocation() throws
XMLStreamException
{
// Have to complete the token to know the ending location...
if (
_tokenIncomplete) {
finishToken();
}
return
getCurrentLocation();
}
public final int
getCurrentLineNr() {
return
_currRow+1;
}
public abstract int
getCurrentColumnNr();
public final
String getInputSystemId() {
return
_config.
getSystemId();
}
public final
String getInputPublicId() {
return
_config.
getPublicId();
}
/*
/**********************************************************************
/* Public scanner interface, other methods
/**********************************************************************
*/
public final boolean
hasEmptyStack() {
return (
_depth == 0);
}
public final int
getDepth() { return
_depth; }
public final boolean
isEmptyTag() { return
_isEmptyTag; }
/*
/**********************************************************************
/* Data accessors, names:
/**********************************************************************
*/
public final
PName getName() {
return
_tokenName;
}
public final
QName getQName() {
return
_tokenName.
constructQName(
_defaultNs);
}
public final
String getDTDPublicId() {
return
_publicId;
}
public final
String getDTDSystemId() {
return
_systemId;
}
/*
/**********************************************************************
/* Data accessors, (element) text:
/**********************************************************************
*/
public final
String getText() throws
XMLStreamException
{
if (
_tokenIncomplete) {
finishToken();
}
return
_textBuilder.
contentsAsString();
}
public final int
getTextLength()
throws
XMLStreamException
{
if (
_tokenIncomplete) {
finishToken();
}
return
_textBuilder.
size();
}
public final char[]
getTextCharacters()
throws
XMLStreamException
{
if (
_tokenIncomplete) {
finishToken();
}
return
_textBuilder.
getTextBuffer();
}
public final int
getTextCharacters(int
srcStart, char[]
target, int
targetStart, int
len)
throws
XMLStreamException
{
if (
_tokenIncomplete) {
finishToken();
}
return
_textBuilder.
contentsToArray(
srcStart,
target,
targetStart,
len);
}
public final int
getText(
Writer w, boolean
preserveContents)
throws
XMLStreamException
{
if (
_tokenIncomplete) {
finishToken();
}
/* !!! Preserve or not, we'll hold the contents in memory.
* Could be improved if necessary.
*/
try {
return
_textBuilder.
rawContentsTo(
w);
} catch (
IOException ioe) {
throw new
IoStreamException(
ioe);
}
}
public final boolean
isTextWhitespace()
throws
XMLStreamException
{
if (
_tokenIncomplete) {
finishToken();
}
return
_textBuilder.
isAllWhitespace();
}
/**
* Method called by the stream reader to decode space-separated tokens
* that are part of the current text event, using given decoder.
*
* @param reset If true, need to tell text buffer to reset its decoding
* state; if false, shouldn't
*/
public final int
decodeElements(
TypedArrayDecoder tad, boolean
reset)
throws
XMLStreamException
{
if (
_tokenIncomplete) {
finishToken();
}
try {
return
_textBuilder.
decodeElements(
tad,
reset);
} catch (
TypedXMLStreamException tex) {
// Need to add location?
Location loc =
getCurrentLocation();
String lexical =
tex.
getLexical();
IllegalArgumentException iae = (
IllegalArgumentException)
tex.
getCause();
throw new
TypedXMLStreamException(
lexical,
tex.
getMessage(),
loc,
iae);
}
}
/**
* Method called by the stream reader to reset given base64 decoder
* with data from the current text event.
*/
public final void
resetForDecoding(
Base64Variant v,
CharArrayBase64Decoder dec, boolean
firstChunk)
throws
XMLStreamException
{
if (
_tokenIncomplete) {
finishToken();
}
_textBuilder.
resetForBinaryDecode(
v,
dec,
firstChunk);
}
/*
/**********************************************************************
/* Data accessors, firing SAX events
/**********************************************************************
*/
public void
fireSaxStartElement(
ContentHandler h,
Attributes attrs)
throws
SAXException
{
if (
h != null) {
// First; any ns declarations?
NsDeclaration nsDecl =
_lastNsDecl;
/* 17-Sep-2006, tatus: There is disparity between START/END_ELEMENT;
* with START_ELEMENT, _depth is one higher than that of ns
* declarations; with END_ELEMENT, the same
*/
int
level =
_depth-1;
while (
nsDecl != null &&
nsDecl.
getLevel() ==
level) {
String prefix =
nsDecl.
getPrefix();
String uri =
nsDecl.
getCurrNsURI();
h.
startPrefixMapping((
prefix == null) ? "" :
prefix,
uri);
nsDecl =
nsDecl.
getPrev();
}
// Then start-elem event itself:
PName n =
getName();
String uri =
n.
getNsUri();
// Sax requires "" (not null) for ns uris...
h.
startElement((
uri == null) ? "" :
uri,
n.
getLocalName(),
n.
getPrefixedName(),
attrs);
}
}
public void
fireSaxEndElement(
ContentHandler h)
throws
SAXException
{
if (
h != null) {
/* Order of events is reversed (wrt. start-element): first
* the end tag event, then unbound prefixes
*/
// End element:
PName n =
getName();
String uri =
n.
getNsUri();
// Sax requires "" (not null) for ns uris...
h.
endElement((
uri == null) ? "" :
uri,
n.
getLocalName(),
n.
getPrefixedName());
// Then, any expiring ns declarations?
NsDeclaration nsDecl =
_lastNsDecl;
/* 17-Sep-2006, tatus: There is disparity between START/END_ELEMENT;
* with START_ELEMENT, _depth is one higher than that of ns
* declarations; with END_ELEMENT, the same
*/
int
level =
_depth;
while (
nsDecl != null &&
nsDecl.
getLevel() ==
level) {
String prefix =
nsDecl.
getPrefix();
h.
endPrefixMapping((
prefix == null) ? "" :
prefix);
nsDecl =
nsDecl.
getPrev();
}
}
}
public void
fireSaxCharacterEvents(
ContentHandler h)
throws
XMLStreamException,
SAXException
{
if (
h != null) {
if (
_tokenIncomplete) {
finishToken();
}
_textBuilder.
fireSaxCharacterEvents(
h);
}
}
public void
fireSaxSpaceEvents(
ContentHandler h)
throws
XMLStreamException,
SAXException
{
if (
h != null) {
if (
_tokenIncomplete) {
finishToken();
}
_textBuilder.
fireSaxSpaceEvents(
h);
}
}
public void
fireSaxCommentEvent(
LexicalHandler h)
throws
XMLStreamException,
SAXException
{
if (
h != null) {
if (
_tokenIncomplete) {
finishToken();
}
_textBuilder.
fireSaxCommentEvent(
h);
}
}
public void
fireSaxPIEvent(
ContentHandler h)
throws
XMLStreamException,
SAXException
{
if (
h != null) {
if (
_tokenIncomplete) {
finishToken();
}
h.
processingInstruction(
_tokenName.
getLocalName(),
getText());
}
}
/*
/**********************************************************************
/* Data accessors, attributes:
/**********************************************************************
*/
public final int
getAttrCount() {
return
_attrCount;
}
public final
String getAttrLocalName(int
index)
{
// Note: caller checks indices:
return
_attrCollector.
getName(
index).
getLocalName();
}
public final
QName getAttrQName(int
index)
{
// Note: caller checks indices:
return
_attrCollector.
getQName(
index);
}
public final
String getAttrPrefixedName(int
index)
{
// Note: caller checks indices:
return
_attrCollector.
getName(
index).
getPrefixedName();
}
public final
String getAttrNsURI(int
index)
{
// Note: caller checks indices:
return
_attrCollector.
getName(
index).
getNsUri();
}
public final
String getAttrPrefix(int
index)
{
// Note: caller checks indices:
return
_attrCollector.
getName(
index).
getPrefix();
}
public final
String getAttrValue(int
index)
{
// Note: caller checks indices
return
_attrCollector.
getValue(
index);
}
public final
String getAttrValue(
String nsURI,
String localName)
{
/* Collector may not be reset if there are no attributes,
* need to check if any could be found first:
*/
if (
_attrCount < 1) {
return null;
}
return
_attrCollector.
getValue(
nsURI,
localName);
}
public final void
decodeAttrValue(int
index,
TypedValueDecoder tvd)
throws
XMLStreamException
{
_attrCollector.
decodeValue(
index,
tvd);
}
/**
* Method called to decode the attribute value that consists of
* zero or more space-separated tokens.
* Decoding is done using the decoder provided.
* @return Number of tokens decoded
*/
public final int
decodeAttrValues(int
index,
TypedArrayDecoder tad)
throws
XMLStreamException
{
return
_attrCollector.
decodeValues(
index,
tad, this);
}
public final byte[]
decodeAttrBinaryValue(int
index,
Base64Variant v,
CharArrayBase64Decoder dec)
throws
XMLStreamException
{
return
_attrCollector.
decodeBinaryValue(
index,
v,
dec, this);
}
public final int
findAttrIndex(
String nsURI,
String localName)
{
/* Collector may not be reset if there are no attributes,
* need to check if any could be found first:
*/
if (
_attrCount < 1) {
return -1;
}
return
_attrCollector.
findIndex(
nsURI,
localName);
}
public final
String getAttrType(int
index)
{
// Note: caller checks indices:
// !!! TBI
return "CDATA";
}
public final boolean
isAttrSpecified(int
index)
{
// !!! TBI
// (for now works ok as we don't handle DTD info, no attr value defaults)
return true;
}
/*
/**********************************************************************
/* Data accessors, namespace declarations:
/**********************************************************************
*/
public final int
getNsCount()
{
if (
_currToken ==
START_ELEMENT) {
return
_currNsCount;
}
return (
_lastNsDecl == null) ? 0 :
_lastNsDecl.
countDeclsOnLevel(
_depth);
}
public final
String getNamespacePrefix(int
index)
{
return
findCurrNsDecl(
index).
getBinding().
mPrefix;
}
public final
String getNamespaceURI(int
index)
{
return
findCurrNsDecl(
index).
getBinding().
mURI;
}
private
NsDeclaration findCurrNsDecl(int
index)
{
NsDeclaration nsDecl =
_lastNsDecl;
/* 17-Sep-2006, tatu: There is disparity between START/END_ELEMENT;
* with START_ELEMENT, _depth is one higher than that of ns
* declarations; with END_ELEMENT, the same
*/
int
level =
_depth;
int
count;
// 20-Jan-2011, tatu: Hmmh... since declarations are in reverse order should we reorder?
if (
_currToken ==
START_ELEMENT) {
count =
_currNsCount - 1 -
index;
--
level;
} else {
count =
index;
}
while (
nsDecl != null &&
nsDecl.
getLevel() ==
level) {
if (
count == 0) {
return
nsDecl;
}
--
count;
nsDecl =
nsDecl.
getPrev();
}
reportInvalidNsIndex(
index);
return null; // never gets here
}
// Part of NamespaceContext impl below
//public final String getNsUri(String prefix)
public final
String getNamespaceURI()
{
String uri =
_tokenName.
getNsUri();
// Null means it uses the default ns:
return (
uri == null) ?
_defaultNs.
mURI :
uri;
}
public final
NamespaceContext getNonTransientNamespaceContext()
{
_lastNsContext =
_lastNsContext.
reuseOrCreate(
_lastNsDecl);
return
_lastNsContext;
}
/*
/**********************************************************************
/* NamespaceContext implementation
/**********************************************************************
*/
@
Override
public
String getNamespaceURI(
String prefix)
{
if (
prefix == null) {
throw new
IllegalArgumentException(
ErrorConsts.
ERR_NULL_ARG);
}
if (
prefix.
length() == 0) { // default namespace?
// Need to check if it's null, too, to convert
String uri =
_defaultNs.
mURI;
return (
uri == null) ? "" :
uri;
}
// xml, xmlns?
if (
prefix.
equals(
XMLConstants.
XML_NS_PREFIX)) {
return
XMLConstants.
XML_NS_URI;
}
if (
prefix.
equals(
XMLConstants.
XMLNS_ATTRIBUTE)) {
return
XMLConstants.
XMLNS_ATTRIBUTE_NS_URI;
}
// Nope, a specific other prefix
NsDeclaration nsDecl =
_lastNsDecl;
while (
nsDecl != null) {
if (
nsDecl.
hasPrefix(
prefix)) {
return
nsDecl.
getCurrNsURI();
}
nsDecl =
nsDecl.
getPrev();
}
return null;
}
@
Override
public
String getPrefix(
String nsURI)
{
/* As per JDK 1.5 JavaDocs, null is illegal; but no mention
* about empty String (""). But that should
*/
if (
nsURI == null) {
throw new
IllegalArgumentException(
ErrorConsts.
ERR_NULL_ARG);
}
if (
nsURI.
equals(
XMLConstants.
XML_NS_URI)) {
return
XMLConstants.
XML_NS_PREFIX;
}
if (
nsURI.
equals(
XMLConstants.
XMLNS_ATTRIBUTE_NS_URI)) {
return
XMLConstants.
XMLNS_ATTRIBUTE;
}
// First: does the default namespace bind to the URI?
if (
nsURI.
equals(
_defaultNs.
mURI)) {
return "";
}
/* Need to loop twice; first find a prefix, then ensure it's
* not masked by a later declaration
*/
main_loop:
for (
NsDeclaration nsDecl =
_lastNsDecl;
nsDecl != null;
nsDecl =
nsDecl.
getPrev()) {
if (
nsDecl.
hasNsURI(
nsURI)) {
// Ok: but is prefix masked?
String prefix =
nsDecl.
getPrefix();
// Plus, default ns wouldn't do (since current one was already checked)
if (
prefix != null) {
for (
NsDeclaration decl2 =
_lastNsDecl;
decl2 !=
nsDecl;
decl2 =
decl2.
getPrev()) {
if (
decl2.
hasPrefix(
prefix)) {
continue
main_loop;
}
}
return
prefix;
}
}
}
return null;
}
@
Override
public
Iterator<
String>
getPrefixes(
String nsURI)
{
if (
nsURI == null) {
throw new
IllegalArgumentException(
ErrorConsts.
ERR_NULL_ARG);
}
if (
nsURI.
equals(
XMLConstants.
XML_NS_URI)) {
return new
SingletonIterator(
XMLConstants.
XML_NS_PREFIX);
}
if (
nsURI.
equals(
XMLConstants.
XMLNS_ATTRIBUTE_NS_URI)) {
return new
SingletonIterator(
XMLConstants.
XMLNS_ATTRIBUTE);
}
ArrayList<
String>
l = null;
// First, the default ns?
if (
nsURI.
equals(
_defaultNs.
mURI)) {
l = new
ArrayList<
String>();
l.
add("");
}
main_loop:
for (
NsDeclaration nsDecl =
_lastNsDecl;
nsDecl != null;
nsDecl =
nsDecl.
getPrev()) {
if (
nsDecl.
hasNsURI(
nsURI)) {
// Ok: but is prefix masked?
String prefix =
nsDecl.
getPrefix();
// Plus, default ns wouldn't do (since current one was already checked)
if (
prefix != null) {
for (
NsDeclaration decl2 =
_lastNsDecl;
decl2 !=
nsDecl;
decl2 =
decl2.
getPrev()) {
if (
decl2.
hasPrefix(
prefix)) {
continue
main_loop;
}
}
if (
l == null) {
l = new
ArrayList<
String>();
}
l.
add(
prefix);
}
}
}
if (
l == null) {
return
EmptyIterator.
getInstance();
}
if (
l.
size() == 1) {
return new
SingletonIterator(
l.
get(0));
}
return
l.
iterator();
}
/*
/**********************************************************************
/* Abstract methods for sub-classes to implement
/**********************************************************************
*/
// // token-finish methods
protected abstract void
finishCharacters()
throws
XMLStreamException;
protected abstract void
finishCData()
throws
XMLStreamException;
protected abstract void
finishComment()
throws
XMLStreamException;
protected abstract void
finishDTD(boolean
copyContents)
throws
XMLStreamException;
protected abstract void
finishPI()
throws
XMLStreamException;
protected abstract void
finishSpace()
throws
XMLStreamException;
// // token-skip methods
/**
* @return True, if an unexpanded entity was encountered (and
* is now pending)
*/
protected abstract boolean
skipCharacters()
throws
XMLStreamException;
protected abstract void
skipCData()
throws
XMLStreamException;
protected abstract void
skipComment()
throws
XMLStreamException;
protected abstract void
skipPI()
throws
XMLStreamException;
protected abstract void
skipSpace()
throws
XMLStreamException;
/**
* Secondary skip method called after primary text segment
* has been skipped, and we are in coalescing mode.
*
* @return True, if an unexpanded entity was encountered (and
* is now pending)
*/
protected abstract boolean
skipCoalescedText()
throws
XMLStreamException;
// // Raw input access:
protected abstract boolean
loadMore()
throws
XMLStreamException;
/*
/**********************************************************************
/* Basic namespace binding methods
/**********************************************************************
*/
/**
* This method is called to find/create a fully qualified (bound)
* name (element / attribute), for a name with prefix. For non-prefixed
* names this method will not get called
*/
protected final
PName bindName(
PName name,
String prefix)
{
// First, do we have a cache, to perhaps find bound name from?
if (
_nsBindingCache != null) {
PName cn =
_nsBindingCache[
name.
unboundHashCode() &
BIND_CACHE_MASK];
if (
cn != null &&
cn.
unboundEquals(
name)) {
return
cn;
}
}
// If no cache, or not found there, need to first find binding
for (int
i = 0,
len =
_nsBindingCount;
i <
len; ++
i) {
NsBinding b =
_nsBindings[
i];
if (
b.
mPrefix !=
prefix) { // prefixes are canonicalized
continue;
}
// Ok, match!
// Can we bubble prefix closer to the head?
if (
i > 0) {
_nsBindings[
i] =
_nsBindings[
i-1];
_nsBindings[
i-1] =
b;
}
// Plus, should we cache it?
PName bn =
name.
createBoundName(
b);
if (
_nsBindingCache == null) {
if (++
_nsBindMisses <
BIND_MISSES_TO_ACTIVATE_CACHE) {
return
bn;
}
_nsBindingCache = new
PName[
BIND_CACHE_SIZE];
}
_nsBindingCache[
bn.
unboundHashCode() &
BIND_CACHE_MASK] =
bn;
return
bn;
}
// If not even binding, need to create that first
// No match; perhaps "xml"? But is "xmlns" legal to use too?
if (
prefix == "xml") {
return
name.
createBoundName(
NsBinding.
XML_BINDING);
}
/* Nope. Need to create a new binding. For such entries, let's
* not try caching, yet, but let's note it as a miss
*/
++
_nsBindMisses;
NsBinding b = new
NsBinding(
prefix);
if (
_nsBindingCount == 0) {
_nsBindings = new
NsBinding[16];
} else if (
_nsBindingCount >=
_nsBindings.length) {
_nsBindings = (
NsBinding[])
DataUtil.
growAnyArrayBy(
_nsBindings,
_nsBindings.length);
}
_nsBindings[
_nsBindingCount] =
b;
++
_nsBindingCount;
return
name.
createBoundName(
b);
}
/**
* Method called when a namespace declaration needs to find the
* binding object (essentially a per-prefix-per-document canonical
* container object)
*/
protected final
NsBinding findOrCreateBinding(
String prefix)
throws
XMLStreamException
{
// !!! TODO: switch to hash at size N?
// TEST only (for ns-soap.xml):
//int MAX = (_nsBindingCount > 8) ? 8 : _nsBindingCount;
//for (int i = 0; i < MAX; ++i) {
for (int
i = 0,
len =
_nsBindingCount;
i <
len; ++
i) {
NsBinding b =
_nsBindings[
i];
if (
b.
mPrefix ==
prefix) { // prefixes are interned
if (
i > 0) { // let's do bubble it up a notch... can speed things up
_nsBindings[
i] =
_nsBindings[
i-1];
_nsBindings[
i-1] =
b;
}
return
b;
}
}
if (
prefix == "xml") {
return
NsBinding.
XML_BINDING;
}
if (
prefix == "xmlns") {
return
NsBinding.
XMLNS_BINDING;
}
// Nope. Need to create a new binding
NsBinding b = new
NsBinding(
prefix);
if (
_nsBindingCount == 0) {
_nsBindings = new
NsBinding[16];
} else if (
_nsBindingCount >=
_nsBindings.length) {
_nsBindings = (
NsBinding[])
DataUtil.
growAnyArrayBy(
_nsBindings,
_nsBindings.length);
}
_nsBindings[
_nsBindingCount] =
b;
++
_nsBindingCount;
return
b;
}
/**
* Method called when we are ready to bind a declared namespace.
*/
protected final void
bindNs(
PName name,
String uri)
throws
XMLStreamException
{
NsBinding ns;
String prefix =
name.
getPrefix();
if (
prefix == null) { // default ns
ns =
_defaultNs;
} else {
prefix =
name.
getLocalName();
ns =
findOrCreateBinding(
prefix);
if (
ns.
isImmutable()) { // xml, xmlns
checkImmutableBinding(
prefix,
uri);
}
}
/* 28-Oct-2006, tatus: Also need to ensure that neither
* xml nor xmlns-bound namespaces are bound to any
* other prefixes. Since we know that URIs are intern()ed,
* can just do identity comparison
*/
if (!
ns.
isImmutable()) {
if (
uri ==
XMLConstants.
XML_NS_URI) {
reportIllegalNsDecl("xml",
XMLConstants.
XML_NS_URI);
} else if (
uri ==
XMLConstants.
XMLNS_ATTRIBUTE_NS_URI) {
reportIllegalNsDecl("xmlns",
XMLConstants.
XMLNS_ATTRIBUTE_NS_URI);
}
}
// Already declared in current scope?
if (
_lastNsDecl != null &&
_lastNsDecl.
alreadyDeclared(
prefix,
_depth)) {
reportDuplicateNsDecl(
prefix);
}
_lastNsDecl = new
NsDeclaration(
ns,
uri,
_lastNsDecl,
_depth);
}
/**
* Method called when an immutable ns prefix (xml, xmlns) is
* encountered.
*/
protected final void
checkImmutableBinding(
String prefix,
String uri)
throws
XMLStreamException
{
if (
prefix != "xml" || !
uri.
equals(
XMLConstants.
XML_NS_URI)) {
reportIllegalNsDecl(
prefix);
}
}
/*
/**********************************************************************
/* Helper methods for sub-classes, input data
/**********************************************************************
*/
/**
* Method that tries to load at least one more byte into buffer;
* and if that fails, throws an appropriate EOI exception.
*/
protected final void
loadMoreGuaranteed()
throws
XMLStreamException
{
if (!
loadMore()) {
reportInputProblem("Unexpected end-of-input when trying to parse "+
ErrorConsts.
tokenTypeDesc(
_currToken));
}
}
protected final void
loadMoreGuaranteed(int
tt) throws
XMLStreamException
{
if (!
loadMore()) {
reportInputProblem("Unexpected end-of-input when trying to parse "+
ErrorConsts.
tokenTypeDesc(
tt));
}
}
/*
/**********************************************************************
/* Helper methods for sub-classes, character validity checks
/**********************************************************************
*/
protected final void
verifyXmlChar(int
value) throws
XMLStreamException
{
// Ok, and then need to check result is a valid XML content char:
if (
value >= 0xD800) { // note: checked for overflow earlier
if (
value < 0xE000) { // no surrogates via entity expansion
reportInvalidXmlChar(
value);
}
if (
value == 0xFFFE ||
value == 0xFFFF) {
reportInvalidXmlChar(
value);
}
} else if (
value < 32) {
// XML 1.1 allows most other chars; 1.0 does not:
if (
value !=
INT_LF &&
value !=
INT_CR &&
value !=
INT_TAB) {
if (!
_xml11 ||
value == 0) {
reportInvalidXmlChar(
value);
}
}
}
}
/*
/**********************************************************************
/* Helper methods for sub-classes, error reporting
/**********************************************************************
*/
protected void
reportInputProblem(
String msg)
throws
XMLStreamException
{
/* 29-Mar-2008, tatus: Not sure if these are all Well-Formedness
* Constraint (WFC) violations? They should be... ?
*/
throw new
WFCException(
msg,
getCurrentLocation());
}
/**
* Method called when a call to expand an entity within attribute
* value fails to expand it.
*/
protected void
reportUnexpandedEntityInAttr(
PName name, boolean
isNsDecl)
throws
XMLStreamException
{
reportInputProblem("Unexpanded ENTITY_REFERENCE ("+
_tokenName+") in "
+(
isNsDecl ? "namespace declaration" : "attribute value"));
}
protected void
reportPrologUnexpElement(boolean
isProlog, int
ch)
throws
XMLStreamException
{
if (
ch < 0) { // just to be safe, in case caller passed signed byte
ch &= 0x7FFFF;
}
if (
ch == '/') { // end element
if (
isProlog) {
reportInputProblem("Unexpected end element in prolog: malformed XML document, expected root element");
}
reportInputProblem("Unexpected end element in epilog: malformed XML document (unbalanced start/end tags?)");
}
// Otherwise, likely start element. But check for invalid white space for funsies
if (
ch < 32) {
String type =
isProlog ?
ErrorConsts.
SUFFIX_IN_PROLOG :
ErrorConsts.
SUFFIX_IN_EPILOG;
throwUnexpectedChar(
ch, "Unrecognized directive "+
type);
}
reportInputProblem("Second root element in content: malformed XML document, only one allowed");
}
protected void
reportPrologUnexpChar(boolean
isProlog, int
ch,
String msg)
throws
XMLStreamException
{
String fullMsg =
isProlog ?
ErrorConsts.
SUFFIX_IN_PROLOG :
ErrorConsts.
SUFFIX_IN_EPILOG;
if (
msg == null) {
if (
ch == '&') {
throwUnexpectedChar(
ch,
fullMsg+"; no entities allowed");
}
} else {
fullMsg +=
msg;
}
throwUnexpectedChar(
ch,
fullMsg);
}
protected void
reportPrologProblem(boolean
isProlog,
String msg)
throws
XMLStreamException
{
String prefix =
isProlog ?
ErrorConsts.
SUFFIX_IN_PROLOG :
ErrorConsts.
SUFFIX_IN_EPILOG;
reportInputProblem(
prefix+": "+
msg);
}
protected void
reportTreeUnexpChar(int
ch,
String msg)
throws
XMLStreamException
{
String fullMsg =
ErrorConsts.
SUFFIX_IN_TREE;
if (
msg != null) {
fullMsg +=
msg;
}
throwUnexpectedChar(
ch,
fullMsg);
}
protected void
reportInvalidNameChar(int
ch, int
index)
throws
XMLStreamException
{
if (
ch ==
INT_COLON) {
reportInputProblem("Invalid colon in name: at most one colon allowed in element/attribute names, and none in PI target or entity names");
}
if (
index == 0) {
reportInputProblem("Invalid name start character (0x"
+
Integer.
toHexString(
ch)+")");
}
reportInputProblem("Invalid name character (0x"
+
Integer.
toHexString(
ch)+")");
}
protected void
reportInvalidXmlChar(int
ch)
throws
XMLStreamException
{
if (
ch == 0) {
reportInputProblem("Invalid null character");
}
if (
ch < 32) {
reportInputProblem("Invalid white space character (0x"
+
Integer.
toHexString(
ch)+")");
}
reportInputProblem("Invalid xml content character (0x"
+
Integer.
toHexString(
ch)+")");
}
protected void
reportEofInName(char[]
cbuf, int
clen)
throws
XMLStreamException
{
reportInputProblem("Unexpected end-of-input in name (parsing "+
ErrorConsts.
tokenTypeDesc(
_currToken)+")");
}
/**
* Called when there's an unexpected char after PI target (non-ws,
* not part of '?>' end marker
*/
protected void
reportMissingPISpace(int
ch)
throws
XMLStreamException
{
throwUnexpectedChar(
ch, ": expected either white space, or closing '?>'");
}
protected void
reportDoubleHyphenInComments()
throws
XMLStreamException
{
reportInputProblem("String '--' not allowed in comment (missing '>'?)");
}
protected void
reportMultipleColonsInName()
throws
XMLStreamException
{
reportInputProblem("Multiple colons not allowed in names");
}
protected void
reportEntityOverflow()
throws
XMLStreamException
{
reportInputProblem("Illegal character entity: value higher than max allowed (0x"+
Integer.
toHexString(
MAX_UNICODE_CHAR)+")");
}
protected void
reportInvalidNsIndex(int
index)
{
/* 24-Jun-2006, tatus: Stax API doesn't specify what (if anything)
* should be thrown. Ref. Impl. throws IndexOutOfBounds, which
* makes sense; could also throw IllegalArgumentException.
*/
throw new
IndexOutOfBoundsException("Illegal namespace declaration index, "+
index+", current START_ELEMENT/END_ELEMENT has "+
getNsCount()+" declarations");
}
protected void
reportUnboundPrefix(
PName name, boolean
isAttr)
throws
XMLStreamException
{
reportInputProblem("Unbound namespace prefix '"+
name.
getPrefix()+"' (for "+(
isAttr ? "attribute" : "element")+" name '"+
name.
getPrefixedName()+"')");
}
protected void
reportDuplicateNsDecl(
String prefix)
throws
XMLStreamException
{
if (
prefix == null) {
reportInputProblem("Duplicate namespace declaration for the default namespace");
} else {
reportInputProblem("Duplicate namespace declaration for prefix '"+
prefix+"'");
}
}
protected void
reportIllegalNsDecl(
String prefix)
throws
XMLStreamException
{
reportInputProblem("Illegal namespace declaration: can not re-bind prefix '"+
prefix+"'");
}
protected void
reportIllegalNsDecl(
String prefix,
String uri)
throws
XMLStreamException
{
reportInputProblem("Illegal namespace declaration: can not bind URI '"+
uri+"' to prefix other than '"+
prefix+"'");
}
protected void
reportUnexpectedEndTag(
String expName)
throws
XMLStreamException
{
reportInputProblem("Unexpected end tag: expected </"+
expName+">");
}
// Thrown when ']]>' found in text content
protected void
reportIllegalCDataEnd() throws
XMLStreamException
{
reportInputProblem("String ']]>' not allowed in textual content, except as the end marker of CDATA section");
}
protected void
throwUnexpectedChar(int
i,
String msg) throws
XMLStreamException
{
// But first, let's check illegals
if (
i < 32 &&
i != '\r' &&
i != '\n' &&
i != '\t') {
throwInvalidSpace(
i);
}
char
c = (char)
i;
String excMsg = "Unexpected character "+
XmlChars.
getCharDesc(
c)+
msg;
reportInputProblem(
excMsg);
}
protected void
throwNullChar() throws
XMLStreamException
{
reportInputProblem("Illegal character (NULL, unicode 0) encountered: not valid in any content");
}
protected char
handleInvalidXmlChar(int
i) throws
XMLStreamException
{
final
IllegalCharHandler iHandler =
_config.
getIllegalCharHandler();
if (
iHandler != null) {
return
iHandler.
convertIllegalChar(
i);
}
char
c = (char)
i;
if (
c ==
CHAR_NULL) {
throwNullChar();
}
String msg = "Illegal XML character ("+
XmlChars.
getCharDesc(
c)+")";
if (
_xml11) {
if (
i <
INT_SPACE) {
msg += " [note: in XML 1.1, it could be included via entity expansion]";
}
}
reportInputProblem(
msg);
//will not reach this block
return (char)
i;
}
protected void
throwInvalidSpace(int
i)
throws
XMLStreamException
{
char
c = (char)
i;
if (
c ==
CHAR_NULL) {
throwNullChar();
}
String msg = "Illegal character ("+
XmlChars.
getCharDesc(
c)+")";
if (
_xml11) {
if (
i <
INT_SPACE) {
msg += " [note: in XML 1.1, it could be included via entity expansion]";
}
}
reportInputProblem(
msg);
}
}