/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.lang;
import java.io.
Serializable;
import java.util.
HashMap;
import java.util.
HashSet;
import java.util.
Iterator;
import java.util.
Map;
import java.util.
Set;
/**
* <p>A set of characters.</p>
*
* <p>Instances are immutable, but instances of subclasses may not be.</p>
*
* @author Stephen Colebourne
* @author Phil Steitz
* @author Pete Gieser
* @author Gary Gregory
* @since 1.0
* @version $Id: CharSet.java 618884 2008-02-06 04:37:17Z bayard $
*/
public class
CharSet implements
Serializable {
/**
* Required for serialization support. Lang version 2.0.
*
* @see java.io.Serializable
*/
private static final long
serialVersionUID = 5947847346149275958L;
/**
* A CharSet defining no characters.
* @since 2.0
*/
public static final
CharSet EMPTY = new
CharSet((
String) null);
/**
* A CharSet defining ASCII alphabetic characters "a-zA-Z".
* @since 2.0
*/
public static final
CharSet ASCII_ALPHA = new
CharSet("a-zA-Z");
/**
* A CharSet defining ASCII alphabetic characters "a-z".
* @since 2.0
*/
public static final
CharSet ASCII_ALPHA_LOWER = new
CharSet("a-z");
/**
* A CharSet defining ASCII alphabetic characters "A-Z".
* @since 2.0
*/
public static final
CharSet ASCII_ALPHA_UPPER = new
CharSet("A-Z");
/**
* A CharSet defining ASCII alphabetic characters "0-9".
* @since 2.0
*/
public static final
CharSet ASCII_NUMERIC = new
CharSet("0-9");
/**
* A Map of the common cases used in the factory.
* Subclasses can add more common patterns if desired.
* @since 2.0
*/
protected static final
Map COMMON = new
HashMap();
static {
COMMON.
put(null,
EMPTY);
COMMON.
put("",
EMPTY);
COMMON.
put("a-zA-Z",
ASCII_ALPHA);
COMMON.
put("A-Za-z",
ASCII_ALPHA);
COMMON.
put("a-z",
ASCII_ALPHA_LOWER);
COMMON.
put("A-Z",
ASCII_ALPHA_UPPER);
COMMON.
put("0-9",
ASCII_NUMERIC);
}
/** The set of CharRange objects. */
private
Set set = new
HashSet();
//-----------------------------------------------------------------------
/**
* <p>Factory method to create a new CharSet using a special syntax.</p>
*
* <ul>
* <li><code>null</code> or empty string ("")
* - set containing no characters</li>
* <li>Single character, such as "a"
* - set containing just that character</li>
* <li>Multi character, such as "a-e"
* - set containing characters from one character to the other</li>
* <li>Negated, such as "^a" or "^a-e"
* - set containing all characters except those defined</li>
* <li>Combinations, such as "abe-g"
* - set containing all the characters from the individual sets</li>
* </ul>
*
* <p>The matching order is:</p>
* <ol>
* <li>Negated multi character range, such as "^a-e"
* <li>Ordinary multi character range, such as "a-e"
* <li>Negated single character, such as "^a"
* <li>Ordinary single character, such as "a"
* </ol>
* <p>Matching works left to right. Once a match is found the
* search starts again from the next character.</p>
*
* <p>If the same range is defined twice using the same syntax, only
* one range will be kept.
* Thus, "a-ca-c" creates only one range of "a-c".</p>
*
* <p>If the start and end of a range are in the wrong order,
* they are reversed. Thus "a-e" is the same as "e-a".
* As a result, "a-ee-a" would create only one range,
* as the "a-e" and "e-a" are the same.</p>
*
* <p>The set of characters represented is the union of the specified ranges.</p>
*
* <p>All CharSet objects returned by this method will be immutable.</p>
*
* @param setStr the String describing the set, may be null
* @return a CharSet instance
* @since 2.0
*/
public static
CharSet getInstance(
String setStr) {
Object set =
COMMON.
get(
setStr);
if (
set != null) {
return (
CharSet)
set;
}
return new
CharSet(
setStr);
}
/**
* <p>Constructs a new CharSet using the set syntax.
* Each string is merged in with the set.</p>
*
* @param setStrs Strings to merge into the initial set, may be null
* @return a CharSet instance
* @since 2.4
*/
public static
CharSet getInstance(
String[]
setStrs) {
if (
setStrs == null) {
return null;
}
return new
CharSet(
setStrs);
}
//-----------------------------------------------------------------------
/**
* <p>Constructs a new CharSet using the set syntax.</p>
*
* @param setStr the String describing the set, may be null
* @since 2.0
*/
protected
CharSet(
String setStr) {
super();
add(
setStr);
}
/**
* <p>Constructs a new CharSet using the set syntax.
* Each string is merged in with the set.</p>
*
* @param set Strings to merge into the initial set
* @throws NullPointerException if set is <code>null</code>
*/
protected
CharSet(
String[]
set) {
super();
int
sz =
set.length;
for (int
i = 0;
i <
sz;
i++) {
add(
set[
i]);
}
}
//-----------------------------------------------------------------------
/**
* <p>Add a set definition string to the <code>CharSet</code>.</p>
*
* @param str set definition string
*/
protected void
add(
String str) {
if (
str == null) {
return;
}
int
len =
str.
length();
int
pos = 0;
while (
pos <
len) {
int
remainder = (
len -
pos);
if (
remainder >= 4 &&
str.
charAt(
pos) == '^' &&
str.
charAt(
pos + 2) == '-') {
// negated range
set.
add(new
CharRange(
str.
charAt(
pos + 1),
str.
charAt(
pos + 3), true));
pos += 4;
} else if (
remainder >= 3 &&
str.
charAt(
pos + 1) == '-') {
// range
set.
add(new
CharRange(
str.
charAt(
pos),
str.
charAt(
pos + 2)));
pos += 3;
} else if (
remainder >= 2 &&
str.
charAt(
pos) == '^') {
// negated char
set.
add(new
CharRange(
str.
charAt(
pos + 1), true));
pos += 2;
} else {
// char
set.
add(new
CharRange(
str.
charAt(
pos)));
pos += 1;
}
}
}
//-----------------------------------------------------------------------
/**
* <p>Gets the internal set as an array of CharRange objects.</p>
*
* @return an array of immutable CharRange objects
* @since 2.0
*/
public
CharRange[]
getCharRanges() {
return (
CharRange[])
set.
toArray(new
CharRange[
set.
size()]);
}
//-----------------------------------------------------------------------
/**
* <p>Does the <code>CharSet</code> contain the specified
* character <code>ch</code>.</p>
*
* @param ch the character to check for
* @return <code>true</code> if the set contains the characters
*/
public boolean
contains(char
ch) {
for (
Iterator it =
set.
iterator();
it.
hasNext();) {
CharRange range = (
CharRange)
it.
next();
if (
range.
contains(
ch)) {
return true;
}
}
return false;
}
// Basics
//-----------------------------------------------------------------------
/**
* <p>Compares two CharSet objects, returning true if they represent
* exactly the same set of characters defined in the same way.</p>
*
* <p>The two sets <code>abc</code> and <code>a-c</code> are <i>not</i>
* equal according to this method.</p>
*
* @param obj the object to compare to
* @return true if equal
* @since 2.0
*/
public boolean
equals(
Object obj) {
if (
obj == this) {
return true;
}
if (
obj instanceof
CharSet == false) {
return false;
}
CharSet other = (
CharSet)
obj;
return
set.
equals(
other.
set);
}
/**
* <p>Gets a hashCode compatible with the equals method.</p>
*
* @return a suitable hashCode
* @since 2.0
*/
public int
hashCode() {
return 89 +
set.
hashCode();
}
/**
* <p>Gets a string representation of the set.</p>
*
* @return string representation of the set
*/
public
String toString() {
return
set.
toString();
}
}