package com.fasterxml.aalto.in;
import java.lang.ref.
SoftReference;
import java.util.*;
import javax.xml.stream.*;
import org.codehaus.stax2.
XMLInputFactory2;
import com.fasterxml.aalto.impl.
CommonConfig;
import com.fasterxml.aalto.util.*;
/**
* This is the shared configuration object passed by the factory to reader,
* and by reader to whoever needs it (scanners at least).
*/
public final class
ReaderConfig
extends
CommonConfig
{
public final static int
DEFAULT_SMALL_BUFFER_LEN = 60;
public final static int
DEFAULT_CHAR_BUFFER_LEN = 4000;
public final static int
STANDALONE_UNKNOWN = 0;
public final static int
STANDALONE_YES = 1;
public final static int
STANDALONE_NO = 2;
// Standard Stax flags:
final static int
F_NS_AWARE = 0x0001;
final static int
F_COALESCING = 0x0002;
final static int
F_DTD_AWARE = 0x0004;
final static int
F_DTD_VALIDATING = 0x0008;
final static int
F_EXPAND_ENTITIES = 0x0010;
// Standard Stax2 flags:
final static int
F_LAZY_PARSING = 0x0100;
final static int
F_INTERN_NAMES = 0x0200;
final static int
F_INTERN_NS_URIS = 0x0400;
final static int
F_REPORT_CDATA = 0x0800;
final static int
F_PRESERVE_LOCATION = 0x1000;
final static int
F_AUTO_CLOSE_INPUT = 0x2000;
// Custom flags:
/**
* These are the default settigs for XMLInputFactory.
*/
final static int
DEFAULT_FLAGS =
F_NS_AWARE
|
F_DTD_AWARE
|
F_EXPAND_ENTITIES
|
F_LAZY_PARSING
// by default we do intern names, ns uris...
|
F_INTERN_NAMES
|
F_INTERN_NS_URIS
// and will report CDATA as such (and not as CHARACTERS)
|
F_REPORT_CDATA
|
F_PRESERVE_LOCATION
;
private final static
HashMap<
String,
Object>
sProperties;
static {
sProperties = new
HashMap<
String,
Object>();
/* 28-Oct-2006, tatus: Let's recognize it, but not allow to be
* disabled. Can/needs to be changed if we'll support it.
*/
sProperties.
put(
XMLInputFactory.
IS_NAMESPACE_AWARE,
Boolean.
TRUE);
sProperties.
put(
XMLInputFactory.
IS_VALIDATING,
//Boolean.FALSE);
new
Integer(
F_DTD_VALIDATING));
sProperties.
put(
XMLInputFactory.
IS_COALESCING,
Integer.
valueOf(
F_COALESCING));
sProperties.
put(
XMLInputFactory.
IS_REPLACING_ENTITY_REFERENCES,
Integer.
valueOf(
F_EXPAND_ENTITIES));
sProperties.
put(
XMLInputFactory.
IS_SUPPORTING_EXTERNAL_ENTITIES,
Boolean.
FALSE);
sProperties.
put(
XMLInputFactory.
SUPPORT_DTD,
Integer.
valueOf(
F_DTD_AWARE));
sProperties.
put(
XMLInputFactory.
REPORTER, null);
sProperties.
put(
XMLInputFactory.
RESOLVER, null);
sProperties.
put(
XMLInputFactory.
ALLOCATOR, null);
// // // Stax2:
sProperties.
put(
XMLInputFactory2.
P_LAZY_PARSING,
Integer.
valueOf(
F_LAZY_PARSING));
sProperties.
put(
XMLInputFactory2.
P_INTERN_NAMES,
Integer.
valueOf(
F_INTERN_NAMES));
sProperties.
put(
XMLInputFactory2.
P_INTERN_NS_URIS,
Integer.
valueOf(
F_INTERN_NS_URIS));
sProperties.
put(
XMLInputFactory2.
P_AUTO_CLOSE_INPUT,
Integer.
valueOf(
F_AUTO_CLOSE_INPUT));
sProperties.
put(
XMLInputFactory2.
P_PRESERVE_LOCATION,
Integer.
valueOf(
F_PRESERVE_LOCATION));
// (ones with fixed defaults)
/* Should we ever support this property? For now, we really shouldn't
* report white space in prolog/epilog, as it's not really part
* of document content.
*/
sProperties.
put(
XMLInputFactory2.
P_REPORT_PROLOG_WHITESPACE,
Boolean.
FALSE);
sProperties.
put(
XMLInputFactory2.
P_REPORT_CDATA,
Integer.
valueOf(
F_REPORT_CDATA));
sProperties.
put(
XMLInputFactory2.
P_PRESERVE_LOCATION,
Boolean.
TRUE);
// !!! Not really implemented, but let's recognize it
sProperties.
put(
XMLInputFactory2.
P_DTD_OVERRIDE, null);
// Custom ones?
}
/**
* A single encoding context instance is shared between all ReaderConfig
* instances created for readers by an input factory. It is used
* for sharing symbol tables.
*/
private final
EncodingContext mEncCtxt;
/**
* For efficient access by qualified name, as well as uniqueness
* checks, namespace URIs need to be canonicalized.
*/
private final
UriCanonicalizer mCanonicalizer;
private final
String mPublicId;
private final
String mSystemId;
/**
* Encoding passed in as external information, possibly from source
* from which xml content was gained from (for example, as an HTTP
* header, or file metadata).
*/
private final
String mExtEncoding;
/**
* Name of the actual encoding that input was found to be in (if any
* -- can't be determined if a Reader was passed in).
*/
private
String mActualEncoding = null;
private
String mXmlDeclVersion = null;
private
String mXmlDeclEncoding = null;
private int
mXmlDeclStandalone =
STANDALONE_UNKNOWN;
private
XMLReporter mReporter;
private
XMLResolver mResolver;
private
IllegalCharHandler illegalCharHandler;
/*
/**********************************************************************
/* Buffer recycling:
/**********************************************************************
*/
/**
* This <code>ThreadLocal</code> contains a {@link SoftRerefence}
* to a {@link BufferRecycler} used to provide a low-cost
* buffer recycling between Reader instances.
*/
final static
ThreadLocal<
SoftReference<
BufferRecycler>>
_recyclerRef = new
ThreadLocal<
SoftReference<
BufferRecycler>>();
/**
* This is the actually container of the recyclable buffers. It
* is obtained via ThreadLocal/SoftReference combination, if one
* exists, when Config instance is created. If one does not
* exist, it will created first time a buffer is returned.
*/
protected
BufferRecycler _currRecycler = null;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
private
ReaderConfig(
String publicId,
String systemId,
String extEnc,
EncodingContext encCtxt, int
flags, int
flagMods,
XMLReporter rep,
XMLResolver res,
UriCanonicalizer canonicalizer)
{
super(
flags,
flagMods);
mPublicId =
publicId;
mSystemId =
systemId;
mExtEncoding =
extEnc;
/* Ok, let's then see if we can find a buffer recycler. Since they
* are lazily constructed, and since GC may just flush them out
* on its whims, it's possible we might not find one. That's ok;
* we can reconstruct one if and when we are to return one or more
* buffers.
*/
SoftReference<
BufferRecycler>
ref =
_recyclerRef.
get();
if (
ref != null) {
_currRecycler =
ref.
get();
}
mEncCtxt =
encCtxt;
_flags =
flags;
_flagMods =
flagMods;
mReporter =
rep;
mResolver =
res;
mCanonicalizer =
canonicalizer;
}
public
ReaderConfig()
{
this(null, null, null, new
EncodingContext(),
DEFAULT_FLAGS, 0,
null, null,
new
UriCanonicalizer());
}
public void
setActualEncoding(
String actualEnc)
{
mActualEncoding =
actualEnc;
}
public void
setXmlDeclInfo(int
version,
String xmlDeclEnc,
String standalone)
{
if (
version ==
XmlConsts.
XML_V_10) {
mXmlDeclVersion =
XmlConsts.
XML_V_10_STR;
} else if (
version ==
XmlConsts.
XML_V_11) {
mXmlDeclVersion =
XmlConsts.
XML_V_11_STR;
} else {
mXmlDeclVersion = null;
}
mXmlDeclEncoding =
xmlDeclEnc;
if (
standalone ==
XmlConsts.
XML_SA_YES) {
mXmlDeclStandalone =
STANDALONE_YES;
} else if (
standalone ==
XmlConsts.
XML_SA_NO) {
mXmlDeclStandalone =
STANDALONE_NO;
} else {
mXmlDeclStandalone =
STANDALONE_UNKNOWN;
}
}
public final void
setXmlVersion(
String version) {
mXmlDeclVersion =
version;
}
public final void
setXmlEncoding(
String enc) {
mXmlDeclEncoding =
enc;
}
public final void
setXmlStandalone(
Boolean b) {
if (
b == null) {
mXmlDeclStandalone =
STANDALONE_UNKNOWN;
} else {
mXmlDeclStandalone =
b.
booleanValue() ?
STANDALONE_YES :
STANDALONE_NO;
}
}
// // // Explicit property setters
// // Stax:
public void
setXMLReporter(
XMLReporter r) {
mReporter =
r;
}
public void
setXMLResolver(
XMLResolver r) {
mResolver =
r;
}
// // Stax2:
public void
doCoalesceText(boolean
state) {
setFlag(
F_COALESCING,
state);
}
// // Stax1.0
public void
doAutoCloseInput(boolean
state) {
setFlag(
F_AUTO_CLOSE_INPUT,
state);
}
public void
doPreserveLocation(boolean
state) {
setFlag(
F_PRESERVE_LOCATION,
state);
}
public void
doParseLazily(boolean
state) {
setFlag(
F_LAZY_PARSING,
state);
}
public void
doReportCData(boolean
state) {
setFlag(
F_REPORT_CDATA,
state);
}
/*
/**********************************************************************
/* Common accessors from CommonConfig
/**********************************************************************
*/
public
ReaderConfig createNonShared(
String publicId,
String systemId,
String extEnc)
{
return new
ReaderConfig(
publicId,
systemId,
extEnc,
mEncCtxt,
_flags,
_flagMods,
mReporter,
mResolver,
mCanonicalizer);
}
@
Override
public
String getExternalEncoding() { return
mExtEncoding; }
@
Override
public
String getActualEncoding() { return
mActualEncoding; }
@
Override
public boolean
isXml11() {
return false;
}
/*
/**********************************************************************
/* Implementation of abstract methods
/**********************************************************************
*/
protected int
findPropertyId(
String propName)
{
Integer I = (
Integer)
sProperties.
get(
propName);
return (
I == null) ? -1 :
I.
intValue();
}
/*
/**********************************************************************
/* Standard accessors, configurable properties
/**********************************************************************
*/
@
Override
public final
Object getProperty(
String name, boolean
isMandatory)
{
Object ob =
sProperties.
get(
name);
if (
ob == null) {
// Might still have it though
if (
sProperties.
containsKey(
name)) {
return null;
}
return super.getProperty(
name,
isMandatory);
}
if (
ob instanceof
Boolean) {
return ((
Boolean)
ob).
booleanValue();
}
if (!(
ob instanceof
Integer)) {
throw new
RuntimeException("Internal error: unrecognized property value type: "+
ob.
getClass().
getName());
}
int
f = ((
Integer)
ob).
intValue();
return
hasFlag(
f);
}
@
Override
public boolean
setProperty(
String name,
Object value)
{
Object ob =
sProperties.
get(
name);
if (
ob == null) {
// Might still have it though
if (
sProperties.
containsKey(
name)) {
return false;
}
return super.setProperty(
name,
value);
}
if (
ob instanceof
Boolean) { // immutable
return false;
}
if (!(
ob instanceof
Integer)) {
throw new
RuntimeException("Internal error");
}
int
f = ((
Integer)
ob).
intValue();
boolean
state = ((
Boolean)
value).
booleanValue();
setFlag(
f,
state);
return true;
}
@
Override
public boolean
isPropertySupported(
String propName)
{
return
sProperties.
containsKey(
propName)
|| super.isPropertySupported(
propName);
}
public
XMLReporter getXMLReporter() { return
mReporter; }
public
XMLResolver getXMLResolver() { return
mResolver; }
// // // Stax standard properties
public boolean
willExpandEntities() {
return
hasFlag(
F_EXPAND_ENTITIES);
}
public boolean
willCoalesceText() {
return
hasFlag(
F_COALESCING);
}
public boolean
willSupportNamespaces() {
return true;
}
// // // Stax2 standard properties
public boolean
willParseLazily() {
return
hasFlag(
F_LAZY_PARSING);
}
public boolean
willInternNames() { return
hasFlag(
F_INTERN_NAMES); }
public boolean
willInternNsURIs() { return
hasFlag(
F_INTERN_NS_URIS); }
public boolean
willReportCData() { return
hasFlag(
F_REPORT_CDATA); }
public boolean
willPreserveLocation() { return
hasFlag(
F_PRESERVE_LOCATION); }
public boolean
willAutoCloseInput() { return
hasFlag(
F_AUTO_CLOSE_INPUT); }
// // // Support for things that must be explicitly enabled
public boolean
hasInternNamesBeenEnabled() { return
hasExplicitFlag(
F_INTERN_NAMES); }
public boolean
hasInternNsURIsBeenEnabled() { return
hasExplicitFlag(
F_INTERN_NS_URIS); }
/*
/**********************************************************************
/* Accessors, detected properties
/**********************************************************************
*/
// // // Input source information
public
String getPublicId() { return
mPublicId; }
public
String getSystemId() { return
mSystemId; }
// // // XML declaration info
public
String getXmlDeclVersion() { return
mXmlDeclVersion; }
public
String getXmlDeclEncoding() { return
mXmlDeclEncoding; }
public int
getXmlDeclStandalone() { return
mXmlDeclStandalone; }
/*
/**********************************************************************
/* Stax2 additions
/**********************************************************************
*/
// // // Profile mutators:
/**
* Method to call to make Reader created conform as closely to XML
* standard as possible, doing all checks and transformations mandated
* (linefeed conversions, attr value normalizations).
* See {@link XMLInputFactory2#configureForXmlConformance} for
* required settings for standard StAX/StAX2 properties.
*<p>
* Notes: Does NOT change 'performance' settings (buffer sizes,
* DTD caching, coalescing, interning, accurate location info).
*/
public void
configureForXmlConformance()
{
// // StAX 1.0 settings
//doSupportNamespaces(true);
//doSupportDTDs(true);
//doSupportExternalEntities(true);
//doReplaceEntityRefs(true);
// // Stax2 additional settings
}
/**
* Method to call to make Reader created be as "convenient" to use
* as possible; ie try to avoid having to deal with some of things
* like segmented text chunks. This may incur some slight performance
* penalties, but should not affect XML conformance.
* See {@link XMLInputFactory2#configureForConvenience} for
* required settings for standard StAX/StAX2 properties.
*/
public void
configureForConvenience()
{
// StAX (1.0) settings:
doCoalesceText(true);
//doReplaceEntityRefs(true);
// StAX2:
//doReportCData(false);
//doReportPrologWhitespace(false);
/* Also, knowing exact locations is nice esp. for error
* reporting purposes
*/
doPreserveLocation(true);
}
/**
* Method to call to make the Reader created be as fast as possible reading
* documents, especially for long-running processes where caching is
* likely to help.
*<p>
* See {@link XMLInputFactory2#configureForSpeed} for
* required settings for standard StAX/StAX2 properties.
*/
public void
configureForSpeed()
{
// StAX (1.0):
doCoalesceText(false);
// StAX2:
doPreserveLocation(false);
//doReportPrologWhitespace(false);
//doInternNames(true); // this is a NOP
//doInternNsURIs(true);
}
/**
* Method to call to minimize the memory usage of the stream/event reader;
* both regarding Objects created, and the temporary memory usage during
* parsing.
* This generally incurs some performance penalties, due to using
* smaller input buffers.
*<p>
* See {@link XMLInputFactory2#configureForLowMemUsage} for
* required settings for standard StAX/StAX2 properties.
*/
public void
configureForLowMemUsage()
{
// StAX (1.0)
doCoalesceText(false);
// StAX2:
doPreserveLocation(false); // can reduce temporary mem usage
}
/**
* Method to call to make Reader try to preserve as much of input
* formatting as possible, so that round-tripping would be as lossless
* as possible.
*<p>
* See {@link XMLInputFactory2#configureForLowMemUsage} for
* required settings for standard StAX/StAX2 properties.
*/
public void
configureForRoundTripping()
{
// StAX (1.0)
doCoalesceText(false);
//doReplaceEntityRefs(false);
// StAX2:
//doReportCData(true);
//doReportPrologWhitespace(true);
}
/*
/**********************************************************************
/* Canonicalization support
/**********************************************************************
*/
public
String canonicalizeURI(char[]
buf, int
uriLen)
{
return
mCanonicalizer.
canonicalizeURI(
buf,
uriLen);
}
/*
/**********************************************************************
/* Buffer recycling:
/**********************************************************************
*/
public char[]
allocSmallCBuffer(int
minSize)
{
//System.err.println("DEBUG: cfg, allocCSmall: "+mCurrRecycler);
if (
_currRecycler != null) {
char[]
result =
_currRecycler.
getSmallCBuffer(
minSize);
if (
result != null) {
return
result;
}
}
// Nope; no recycler, or it has no suitable buffers, let's create:
return new char[
minSize];
}
public void
freeSmallCBuffer(char[]
buffer)
{
//System.err.println("DEBUG: cfg, freeCSmall: "+buffer);
// Need to create (and assign) the buffer?
if (
_currRecycler == null) {
_currRecycler =
createRecycler();
}
_currRecycler.
returnSmallCBuffer(
buffer);
}
public char[]
allocMediumCBuffer(int
minSize)
{
//System.err.println("DEBUG: cfg, allocCMed: "+mCurrRecycler);
if (
_currRecycler != null) {
char[]
result =
_currRecycler.
getMediumCBuffer(
minSize);
if (
result != null) {
return
result;
}
}
return new char[
minSize];
}
public void
freeMediumCBuffer(char[]
buffer)
{
//System.err.println("DEBUG: cfg, freeCMed: "+buffer);
if (
_currRecycler == null) {
_currRecycler =
createRecycler();
}
_currRecycler.
returnMediumCBuffer(
buffer);
}
public char[]
allocFullCBuffer(int
minSize)
{
//System.err.println("DEBUG: cfg, allocCFull: "+mCurrRecycler);
if (
_currRecycler != null) {
char[]
result =
_currRecycler.
getFullCBuffer(
minSize);
if (
result != null) {
return
result;
}
}
return new char[
minSize];
}
public void
freeFullCBuffer(char[]
buffer)
{
//System.err.println("DEBUG: cfg, freeCFull: "+buffer);
// Need to create (and assign) the buffer?
if (
_currRecycler == null) {
_currRecycler =
createRecycler();
}
_currRecycler.
returnFullCBuffer(
buffer);
}
public byte[]
allocFullBBuffer(int
minSize)
{
//System.err.println("DEBUG: cfg, allocBFull: "+mCurrRecycler);
if (
_currRecycler != null) {
byte[]
result =
_currRecycler.
getFullBBuffer(
minSize);
if (
result != null) {
return
result;
}
}
return new byte[
minSize];
}
public void
freeFullBBuffer(byte[]
buffer)
{
//System.err.println("DEBUG: cfg, freeBFull: "+buffer);
// Need to create (and assign) the buffer?
if (
_currRecycler == null) {
_currRecycler =
createRecycler();
}
_currRecycler.
returnFullBBuffer(
buffer);
}
//static int Counter = 0;
private
BufferRecycler createRecycler()
{
BufferRecycler recycler = new
BufferRecycler();
// No way to reuse/reset SoftReference, have to create new always:
//System.err.println("DEBUG: RefCount: "+(++Counter));
_recyclerRef.
set(new
SoftReference<
BufferRecycler>(
recycler));
return
recycler;
}
/*
/**********************************************************************
/* Symbol table reusing, character types
/**********************************************************************
*/
public
ByteBasedPNameTable getBBSymbols()
{
if (
mActualEncoding ==
CharsetNames.
CS_UTF8) {
return
mEncCtxt.
getUtf8Symbols();
}
if (
mActualEncoding ==
CharsetNames.
CS_ISO_LATIN1) {
return
mEncCtxt.
getLatin1Symbols();
}
if (
mActualEncoding ==
CharsetNames.
CS_US_ASCII) {
return
mEncCtxt.
getAsciiSymbols();
}
throw new
Error("Internal error, unknown encoding '"+
mActualEncoding+"'");
}
public
CharBasedPNameTable getCBSymbols()
{
return
mEncCtxt.
getSymbols();
}
public void
updateBBSymbols(
ByteBasedPNameTable sym)
{
if (
mActualEncoding ==
CharsetNames.
CS_UTF8) {
mEncCtxt.
updateUtf8Symbols(
sym);
} else if (
mActualEncoding ==
CharsetNames.
CS_ISO_LATIN1) {
mEncCtxt.
updateLatin1Symbols(
sym);
} else if (
mActualEncoding ==
CharsetNames.
CS_US_ASCII) {
mEncCtxt.
updateAsciiSymbols(
sym);
} else {
throw new
Error("Internal error, unknown encoding '"+
mActualEncoding+"'");
}
}
public void
updateCBSymbols(
CharBasedPNameTable sym)
{
mEncCtxt.
updateSymbols(
sym);
}
public
XmlCharTypes getCharTypes()
{
if (
mActualEncoding ==
CharsetNames.
CS_UTF8) {
return
InputCharTypes.
getUtf8CharTypes();
}
if (
mActualEncoding ==
CharsetNames.
CS_ISO_LATIN1) {
return
InputCharTypes.
getLatin1CharTypes();
}
if (
mActualEncoding ==
CharsetNames.
CS_US_ASCII) {
return
InputCharTypes.
getAsciiCharTypes();
}
throw new
Error("Internal error, unknown encoding '"+
mActualEncoding+"'");
}
/*
/**********************************************************************
/* Helper classes
/**********************************************************************
*/
/**
* This is a simple container class that is used to encapsulate
* per-factory encoding-dependant information like symbol tables.
*/
final static class
EncodingContext
{
ByteBasedPNameTable mUtf8Table;
ByteBasedPNameTable mLatin1Table;
ByteBasedPNameTable mAsciiTable;
/**
* If there is no encoding to worry about, we only need a single
* symbol table.
*/
CharBasedPNameTable mGeneralTable;
EncodingContext() { }
public synchronized
ByteBasedPNameTable getUtf8Symbols()
{
if (
mUtf8Table == null) {
mUtf8Table = new
ByteBasedPNameTable(64);
}
return new
ByteBasedPNameTable(
mUtf8Table);
}
public synchronized void
updateUtf8Symbols(
ByteBasedPNameTable sym)
{
mUtf8Table.
mergeFromChild(
sym);
}
public synchronized
ByteBasedPNameTable getLatin1Symbols()
{
if (
mLatin1Table == null) {
mLatin1Table = new
ByteBasedPNameTable(64);
}
return new
ByteBasedPNameTable(
mLatin1Table);
}
public synchronized void
updateLatin1Symbols(
ByteBasedPNameTable sym)
{
mLatin1Table.
mergeFromChild(
sym);
}
public synchronized
ByteBasedPNameTable getAsciiSymbols()
{
if (
mAsciiTable == null) {
mAsciiTable = new
ByteBasedPNameTable(64);
}
return new
ByteBasedPNameTable(
mAsciiTable);
}
public synchronized void
updateAsciiSymbols(
ByteBasedPNameTable sym)
{
mAsciiTable.
mergeFromChild(
sym);
}
public synchronized
CharBasedPNameTable getSymbols()
{
if (
mGeneralTable == null) {
mGeneralTable = new
CharBasedPNameTable(64);
}
return new
CharBasedPNameTable(
mGeneralTable);
}
public synchronized void
updateSymbols(
CharBasedPNameTable sym)
{
mGeneralTable.
mergeFromChild(
sym);
}
}
public void
setIllegalCharHandler(
IllegalCharHandler illegalCharHandler) {
this.
illegalCharHandler =
illegalCharHandler;
}
public
IllegalCharHandler getIllegalCharHandler() {
return this.
illegalCharHandler;
}
}