You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2010/09/02 15:43:51 UTC
svn commit: r991931 [1/2] - in /uima/uimaj/trunk/uimaj-core/src:
main/java/org/apache/uima/analysis_engine/impl/
main/java/org/apache/uima/analysis_engine/impl/compatibility/
main/java/org/apache/uima/cas/impl/
test/java/org/apache/uima/analysis_engine...
Author: schor
Date: Thu Sep 2 13:43:50 2010
New Revision: 991931
URL: http://svn.apache.org/viewvc?rev=991931&view=rev
Log:
[UIMA-1860] [UIMA-1840] Re-impl of result spec, supporting correct semanntics in detail. Test case updates to test additional corner cases.
Added:
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsTypesMap.java
Modified:
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/TypeOrFeature_impl.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/compatibility/AnnotatorAdapter.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/analysis_engine/impl/AnalysisEngine_implTest.java
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/analysis_engine/impl/ResultSpecTest.java
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/analysis_engine/impl/ResultSpecWithTypeSystemTest.java
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java?rev=991931&r1=991930&r2=991931&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java Thu Sep 2 13:43:50 2010
@@ -359,14 +359,14 @@ public class PrimitiveAnalysisEngine_imp
if (mLastTypeSystem != view.getTypeSystem()) {
mLastTypeSystem = view.getTypeSystem();
mCurrentResultSpecification.setTypeSystem(mLastTypeSystem);
- rsFromOutputCapabilities = new ResultSpecification_impl();
+ rsFromOutputCapabilities = new ResultSpecification_impl(mLastTypeSystem);
rsFromOutputCapabilities.addCapabilities(this.getAnalysisEngineMetaData().getCapabilities());
}
// the actual ResultSpec we send to the component is formed by
- // looking at this primitive AE's declared output types and eliminiating
+ // looking at this primitive AE's declared output types and eliminating
// any that are not in mCurrentResultSpecification.
ResultSpecification analysisComponentResultSpec =
- ResultSpecification_impl.intersect(mCurrentResultSpecification, (ResultSpecification_impl) rsFromOutputCapabilities);
+ ((ResultSpecification_impl)mCurrentResultSpecification).intersect((ResultSpecification_impl)rsFromOutputCapabilities);
mAnalysisComponent.setResultSpecification(analysisComponentResultSpec);
mResultSpecChanged = false;
}
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java?rev=991931&r1=991930&r2=991931&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java Thu Sep 2 13:43:50 2010
@@ -20,15 +20,18 @@
package org.apache.uima.analysis_engine.impl;
import java.util.ArrayList;
-import java.util.BitSet;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.TypeOrFeature;
+import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.TypeSystemImpl;
import org.apache.uima.cas.text.Language;
import org.apache.uima.resource.metadata.Capability;
import org.apache.uima.resource.metadata.impl.MetaDataObject_impl;
@@ -36,37 +39,88 @@ import org.apache.uima.resource.metadata
import org.apache.uima.resource.metadata.impl.XmlizationInfo;
/**
- * Reference implementaion of {@link ResultSpecification}.
+ * Reference implementation of {@link ResultSpecification}.
*
* Notes on the implementation
*
- * There are two ways this data is used: with and without "compiling"
- * Compiling means: adding subtypes of types and adding all features of a type
- * Uncompiled form is called ORIGINAL.
- *
- * Compiling is deferred - until the first reference to containsType or Feature.
+ * Result Specifications (result specs, rs) are closely tied to capability specifications.
+ *
+ * They consist of instances of
+ * TypeOrFeatures and associated languages for which they are set.
*
- * Many instances of this class are made, sometimes via cloning.
+ * This impl supports removing previously added types and features
+ * for particular languages.
*
- * Sometimes types and features are deleted - the intent is to do this operation on the
- * uncompiled form, and then "recompile" it.
+ * There are two forms of the data kept:
+ * The data as it was provided to set the items in the result spec
+ * This form is used when removing previously added things
+ *
+ * The data after a type system has been provided, expanded to cover
+ * the various implied settings, due to either
+ * all Features flag on a type or
+ * the type/subtype hierarchy in the type system
+ *
+ * TypesOrFeatures are:
+ * typeXXX:FeatureYYY - specifying a particular feature of a type
+ * (Corner case: typeXXX:FeatureYYY doesn't imply there's a
+ * typeXXX allFeat nor a
+ * typeXXX w/o allFeat.
+ *
+ * typeXXX with allFeatures - a shorthand for specifying
+ * typeXXX and
+ * typeXXX:FeatureYYY for all features YYY defined for typeXXX
+ * (Corner case: excludes features ZZZ defined only in subtype of typeXXX)
+ * typeXXX without allFeatures (w/o allFeat) - specifies a type, but says nothing about the features
+ * This is specifiable in the XML. It means:
+ * The type is produced/needed but there's no information about the features that
+ * are to be produced or used
*
- * Types and Features are kept on a per-language basis. Language can include a special value,
- * x-unspecified, which "matches" any other language.
+ * containsType typeXXX
+ * returns true if typeXXX is in the RS, with or without the allFeats flag
+ * returns false if only features involving typeXXX are specified
+ *
+ * Intersection is done on fully expanded representations.
*
- * Language specifications are simplified to eliminate the country part. All refs to
- * test if a type or feature is in the result spec for a language uses the simplified language.
+ * There are two kinds of inheritance used
+ * Assuming there's a type system (which must be present when intersection is used), there's type/subtype
+ * This means that if a resultSpec is set for typeXXX, then the containsType(typeYYY)
+ * returns true if typeYYY is a subtype of typeXXX.
+ * This also needs to work for typeXXX:featZZZ; containsFeature(typeYYY:featZZZ)
+ * returns true if type YYY is a subtype of typeXXX.
+ *
+ * Languages have a 3 level hierarchy:
+ * x-unspecified - the same as no language being specified.
+ * If the resultSpec contains typeXXX for language x-unspecified,
+ * containsType(typeXXX, languageLLL) returns true, for any languageLLL
+ * a "base" language, without a '-', e.g. "en"
+ * a sub-language, with one or more '-', e.g., "en-us"
+ *
+ * The rules for matching languages only handle these three levels of inheritance.
+ * (Corner case: 3 or more level language hierarchy are treated as 3 level hierarchies
+ * eg. zh-Hant-HK (Traditional Chinese as used in Hong Kong)
+ * See http://www.w3.org/International/articles/language-tags/Overview.en.php )
*
- * Set operations are done to combine, for a particular type or feature, the languages for which it is valid.
- * This is a Union operation
- * Set operations are done to union the input types/features with the output types/features when computing the default
- * result-spec for an aggregate.
- * Set operations are done to intersect the result spec with the output capabilities of a component.
+ * Design considerations and assumptions
+ * Many instances of this class are made, sometimes via cloning.
+ * Most uses only use types, not type:features
+ * Most don't use languages
+ * A small subset of the possible types and type:features is specified explicitly
+ * Sometimes types and/or features are deleted. (language capability flow deletes types and/or features)
*
- * Languages are represented as integers; there is a hash table from the string to the integer, and
- * an array to go from integer to lang string.
+ * Types and Features are kept on a per-language basis. Language can include a special value,
+ * x-unspecified, which "matches" any other language.
*
- * A result set of ORIGINALs consists of types/features with associated language sets.
+ * Set operations among different result specs:
+ * Union: done in aggregates over result-specs derived from input capabilities of delegates
+ * Intersection: done for primitive components, over result-spec derived from output capability of the primitive
+ * remove: one type or feature (used by language capability flow)
+ * (Corner cases
+ * removing typeXXX doesn't remove typeXXX:featureYYY
+ * removing typeXXX allFeat doesn't remove typeXXX w/o allFeat (may have different languages)
+ * removing typeXXX w/o allFeat doesn't remove typeXXX allFeat
+ *
+ * The compiled version is used in containsType, containsFeature testing, and is used when
+ * computing intersection.
*/
public final class ResultSpecification_impl extends MetaDataObject_impl implements
@@ -74,131 +128,41 @@ public final class ResultSpecification_i
private static final long serialVersionUID = 8516517600467270594L;
- private static final int UNSPECIFIED_LANGUAGE_INDEX = 0;
-
/**
* main language separator e.g 'en' and 'en-US'
- */
- private static final char LANGUAGE_SEPARATOR = '-';
-
- private class ToF_Languages implements Cloneable {
- public TypeOrFeature tof;
- public BitSet languages;
-
- ToF_Languages(TypeOrFeature aTof, String[] aLanguages) {
- tof = aTof;
- languages = new BitSet();
- for (String lang : aLanguages) {
- languages.set(getLanguageIndex(lang));
- }
- }
-
- ToF_Languages(TypeOrFeature aTof, BitSet aLanguages) {
- tof = aTof;
- languages = aLanguages;
- }
-
- public Object clone() {
- return new ToF_Languages((TypeOrFeature) tof.clone(), (BitSet)languages.clone());
- }
-
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + ((languages == null) ? 0 : languages.hashCode());
- result = prime * result + ((tof == null) ? 0 : tof.hashCode());
- return result;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- final ToF_Languages other = (ToF_Languages) obj;
- if (languages == null) {
- if (other.languages != null)
- return false;
- } else if (!languages.equals(other.languages))
- return false;
- if (tof == null) {
- if (other.tof != null)
- return false;
- } else if (!tof.equals(other.tof))
- return false;
- return true;
- }
-
+ */
- }
-
- private boolean needsCompilation = true;
-
- private final Map<String, Integer> lang2int;
-
+ private static final String[] ARRAY_X_UNSPEC = new String[]{Language.UNSPECIFIED_LANGUAGE};
/**
- * hash map used to map fully qualified type and feature names to associated
- * ToF_Languages instances. This used for ORIGINAL types and features.
- *
- * Another hash map is used for compiled types and features - these include
- * the subtypes of the ORIGINAL types. We keep the originals because the
- * operations of adding and removing types and features are done with respect
- * to the originals, only, and then the other map for compiled types is recomputed.
- *
- * A case in particular: we need to be able to distinguish which types were
- * originally marked allAnnotatorFeatures, versus those types which were
- * added because they were subtypes. The corner case happens when a type is both
- * an original and is also an added-via-subtype, where the allAnnotatorFeatures
- * flag of the original is not set but the subtype version is set.
- *
+ * form used in hash table of compilied version to represent x-unspecified
+ * (can't use null - that means entry not in table)
*/
- private final Map<String, ToF_Languages> name2tof_langs;
-
+ private static final RsLangs compiledXunspecified = RsLangs.createSharableEmpty(); // a distinct object
+
/**
- * hash map used to map fully qualified type and feature names to associated
- * ToF_Languages instances. This used for COMPILED types and features.
+ * used for empty type subsumption lists in subtype iterator
*/
-
- private final Map<String, ToF_Languages> withSubtypesName2tof_langs;
-
-// /**
-// * Map from TypeOrFeature objects to HashSets that include the language codes (Strings) for which
-// * that type or feature should be produced.
-// */
-// private Map<TypeOrFeature, Set<String>> mTypesAndFeatures = new HashMap<TypeOrFeature, Set<String>>();
-//
-// /**
-// * Map from String type or feature names to HashSets that include the language codes (Strings) for
-// * which that type or feature should be produced. This is populated by the compile() method, and
-// * includes subtypes as well as the individual feature names for types that have
-// * allAnnotatorFeatures=true.
-// */
-// private final Map<String, Set<String>> mCompiledNameToLanguageMap =
-// new HashMap<String, Set<String>>();
+ public static final List<Type> EMPTY_TYPE_LIST = new ArrayList<Type>(0);
/**
- * Default language set to use if nothing else is specified
+ * For this Result-specification, the collection of language-sets
+ * Uncompiled format
*/
- private static final String[] UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1 = new String[] {Language.UNSPECIFIED_LANGUAGE};
-
+ private final RsTypesMap rsTypesMap;
+
/**
* The type system used to compute the subtypes and allAnnotatorFeatures of types
*/
- private TypeSystem mTypeSystem;
+ private TypeSystem mTypeSystem = null;
+
+ // compiled forms
+ private boolean needsCompilation = true;
+ private final Map<String, RsLangs> rsCompiled;
- /**
- * constructor: init the default languge set with the language x-unspecified
- */
public ResultSpecification_impl() {
- name2tof_langs = new HashMap<String, ToF_Languages>();
- withSubtypesName2tof_langs = new HashMap<String, ToF_Languages>();
- lang2int = new HashMap<String, Integer>();
- lang2int.put(Language.UNSPECIFIED_LANGUAGE, 0);
+ rsTypesMap = new RsTypesMap();
+ rsCompiled = new HashMap<String, RsLangs>();
}
/**
@@ -213,294 +177,109 @@ public final class ResultSpecification_i
this();
mTypeSystem = aTypeSystem;
}
-
- private ResultSpecification_impl(ResultSpecification_impl original) {
- name2tof_langs = new HashMap<String, ToF_Languages>(original.name2tof_langs.size());
- withSubtypesName2tof_langs = new HashMap<String, ToF_Languages>(original.withSubtypesName2tof_langs.size());
-
- // don't share this - unless prove there are no multi-tasking interlocks possible
- lang2int = new HashMap<String, Integer>(original.lang2int);
-
- for (Map.Entry<String, ToF_Languages> entry : original.name2tof_langs.entrySet()) {
- ToF_Languages tof_langs = entry.getValue();
-
- // note: TypeOrFeature instances are not cloned, but shared
- // If they are modified, things may break
- name2tof_langs.put(entry.getKey(),
- new ToF_Languages(tof_langs.tof, (BitSet)(tof_langs.languages.clone())));
- }
- mTypeSystem = original.mTypeSystem;
- }
-
- private int getBaseLanguageIndex(String language) {
- return getLanguageIndex(getBaseLanguage(language));
- }
-
- private int getLanguageIndex(String language) {
- Integer r = lang2int.get(language);
- if (null == r) {
- int i = lang2int.size();
- lang2int.put(language, Integer.valueOf(i));
- return i;
- }
- return r.intValue();
- }
- private void compileIfNeeded() {
- if (needsCompilation) {
- compile();
- }
- }
-
- private static String getBaseLanguage(String language) {
- String baseLanguage = language;
- int index = language.indexOf(LANGUAGE_SEPARATOR);
- if (index > -1) {
- baseLanguage = language.substring(0, index);
- }
- return baseLanguage;
- }
-
/**
- * @see org.apache.uima.analysis_engine.ResultSpecification#getResultTypesAndFeatures()
+ * copies the result spec passed in so that updates to it
+ * don't affect the original
+ * @param original
*/
- public TypeOrFeature[] getResultTypesAndFeatures() {
- TypeOrFeature[] arr = new TypeOrFeature[name2tof_langs.size()];
- int i = 0;
- for (ToF_Languages tof_langs : name2tof_langs.values()) {
- arr[i++] = tof_langs.tof;
- }
- return arr;
- }
-
- private Map<String, ToF_Languages> availName2tof_langs() {
- if (needsCompilation) {
- return name2tof_langs;
+ private ResultSpecification_impl(ResultSpecification_impl original) {
+ mTypeSystem = original.mTypeSystem; // not cloned
+ rsTypesMap = new RsTypesMap(original.rsTypesMap);
+ needsCompilation = original.needsCompilation;
+ rsCompiled = new HashMap<String, RsLangs>(original.rsCompiled);
+ for (Map.Entry<String, RsLangs> e : rsCompiled.entrySet()) {
+ e.getValue().setShared();
}
- return withSubtypesName2tof_langs;
}
-
+
/**
- * return the set of languages for this type or feature, or null if no such type/feature
+ * @see org.apache.uima.analysis_engine.ResultSpecification#getResultTypesAndFeatures()
*/
- private ToF_Languages getLanguagesForTypeOrFeature(String typeOrFeature) {
- boolean isType = typeOrFeature.indexOf(TypeSystem.FEATURE_SEPARATOR) == -1;
- Map<String, ToF_Languages> tofMap = (isType) ? availName2tof_langs() : name2tof_langs;
- return tofMap.get(typeOrFeature);
- }
+ public TypeOrFeature[] getResultTypesAndFeatures() {
+ return getResultTypesAndFeatures(true, null);
+ }
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#getResultTypesAndFeatures(java.lang.String)
+ * May contain near-duplicates - same type, but with different settings of allannotatorfeatures
+ * (only if they have different languages)
*/
public TypeOrFeature[] getResultTypesAndFeatures(String language) {
-
- int languageIndex = getLanguageIndex(language);
- int baseLanguageIndex = getBaseLanguageIndex(language);
-
- // holds the found ToFs for the specified language
- List<TypeOrFeature> foundToF = new ArrayList<TypeOrFeature>();
-
- for (Map.Entry<String, ToF_Languages> entry : name2tof_langs.entrySet()) {
- if (languageMatches(entry.getValue(), languageIndex, baseLanguageIndex)) {
- foundToF.add(entry.getValue().tof);
- }
- }
- return foundToF.toArray(new TypeOrFeature[foundToF.size()]);
+ return getResultTypesAndFeatures(false, language);
}
-
- // private helper functions
-// private boolean sameLanguages(String [] s, BitSet b) {
-// if (s.length != b.cardinality()) {
-// return false;
-// }
-// for (String lang : s) {
-// if ( ! b.get(getLanguageIndex(lang))) {
-// return false;
-// }
-// }
-// return true;
-// }
-
- /**
- * change null languages to the unspecified language
- * change a set of languages that includes the unspecified language to
- * just the unspecified language.
- * This is OK when storing things into a result spec, since
- * the unspecified language will match any query.
- * This doesn't apply for querying because the queries only
- * specify one language, not a set
- */
- private String [] normalizeLanguages(String [] languages) {
- if (null == languages) {
- return UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1;
- } else {
- for (String lang : languages) {
- if (lang.equals(Language.UNSPECIFIED_LANGUAGE)) {
- return UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1;
+ private TypeOrFeature[] getResultTypesAndFeatures(boolean skipLanguageFilter, String language) {
+ List<TypeOrFeature> r = new ArrayList<TypeOrFeature>();
+ if ((rsTypesMap == null || rsTypesMap.nbrOfTypes() == 0) && needsCompilation == false) {
+ // being called on results of intersection
+ // probably by a test case, not a normal call
+ // attempt to construct a plausible representation
+ reconstructRsTypesFromCompiled();
+ }
+ for (RsType t : rsTypesMap) {
+ if (t.isAllFeatures && (skipLanguageFilter || RsLangs.subsumes(t.languagesAllFeat, language))) {
+ r.add(createTypeOrFeature(t.typeName, true, true));
+ }
+ if (t.isSpecified && (skipLanguageFilter || RsLangs.subsumes(t.languagesNotAllFeat, language))) {
+ if (!(t.isAllFeatures && t.languagesAllFeat.equals(t.languagesNotAllFeat))) // don't make a duplicate
+ r.add(createTypeOrFeature(t.typeName, true, false));
+ }
+ if (t.features != null) {
+ for (RsFeat f : t.features) {
+ if (skipLanguageFilter || f.subsumes(language))
+ r.add(createTypeOrFeature(t.typeName, f.shortFeatName));
}
}
}
- // normalization is expensive - so do this once as part of parsing capabilities
-// int i = 0;
-// for (String language : languages) {
-// languages[i++] = normalizeLanguage(language);
-// }
- return languages;
+ return r.toArray(new TypeOrFeature[r.size()]);
}
-
-// private String normalizeLanguage(String language) {
-// String result = language.toLowerCase(Locale.ENGLISH); // language specs are in English locale
-// return result.replace('_', '-');
-// }
-
- private void setNeedsCompilation() {
- needsCompilation = true;
- if (0 != withSubtypesName2tof_langs.size()) {
- withSubtypesName2tof_langs.clear();
- }
- }
-
- private void addTypeOrFeatureInternal(TypeOrFeature tof, String[] languages) {
- languages = normalizeLanguages(languages);
-
- ToF_Languages tof_langs = name2tof_langs.get(tof.getName());
- if (null == tof_langs) {
- name2tof_langs.put(tof.getName(), new ToF_Languages(tof, languages));
- setNeedsCompilation();
- return;
- }
- tof_langs.tof.setAllAnnotatorFeatures(tof.isAllAnnotatorFeatures());
- BitSet langBitSet = tof_langs.languages;
- langBitSet.clear();
- for (String lang : languages) {
- langBitSet.set(getLanguageIndex(lang));
- }
- setNeedsCompilation();
- }
-
- /**
- * Create an entry in this result spec from the type or feature and its languages
- * @param tofLangs
- */
- private void addClonedToF_Languages(ToF_Languages tofLangs, ResultSpecification_impl rs) {
- List<String> languages = new ArrayList<String>();
- BitSet bs = tofLangs.languages;
- for (Map.Entry<String, Integer> si : rs.lang2int.entrySet()) {
- if (bs.get(si.getValue())) {
- languages.add(si.getKey());
- }
- }
-
- ToF_Languages n = new ToF_Languages(
- tofLangs.tof,
- languages.toArray(new String[languages.size()]));
- name2tof_langs.put(n.tof.getName(), n);
- setNeedsCompilation();
- }
-
- private TypeOrFeature createTypeOrFeature(String name, boolean isType, boolean aAllAnnotatorFeatures) {
- TypeOrFeature r = new TypeOrFeature_impl();
- r.setType(isType);
- r.setName(name);
- if (isType) {
- r.setAllAnnotatorFeatures(aAllAnnotatorFeatures);
- }
- return r;
- }
-
- private void addResultTypeOrFeatureAddLanguage(String name, boolean isType, boolean allAnnotatorFeatures, String[] languages) {
-
- ToF_Languages tof_langs = name2tof_langs.get(name);
-
- if (null == tof_langs) {
- addTypeOrFeatureInternal(createTypeOrFeature(name, isType, allAnnotatorFeatures), languages);
- setNeedsCompilation();
- return;
- }
-
- // tof_langs entry for this name exists, so update it
- addResultTypeOrFeatureAddLanguageCommon(tof_langs, allAnnotatorFeatures, languages);
- }
-
- private void addResultTypeOrFeatureAddLanguage(TypeOrFeature tof, String[] languages) {
-
- ToF_Languages tof_langs = name2tof_langs.get(tof.getName());
-
- if (null == tof_langs) {
- addTypeOrFeatureInternal(tof, languages);
- setNeedsCompilation();
- return;
- }
-
- addResultTypeOrFeatureAddLanguageCommon(tof_langs, tof.isAllAnnotatorFeatures(), languages);
- }
-
- private void addResultTypeOrFeatureAddLanguageCommon(
- ToF_Languages tof_langs, boolean allAnnotatorFeatures, String [] languages) {
- // tof_langs entry for this name exists, so update it
- if (allAnnotatorFeatures) {
- if (!tof_langs.tof.isAllAnnotatorFeatures()) {
- tof_langs.tof.setAllAnnotatorFeatures(true);
- setNeedsCompilation();
+ private void reconstructRsTypesFromCompiled() {
+ // First, recompute basic rsTypes and rsFeatures hooked to types
+ for (Entry<String, RsLangs> e : rsCompiled.entrySet()) {
+ String tofName = e.getKey();
+ int b = tofName.indexOf(TypeSystem.FEATURE_SEPARATOR);
+ if (b == -1) {
+ rsTypesMap.add(tofName, false, e.getValue(), false);
+ } else {
+ String typeName = tofName.substring(0, b);
+ String featName = tofName.substring(b+1);
+ rsTypesMap.add(typeName, featName, e.getValue(), false);
}
}
-
- // update the languages by adding the new languages passed in
- languages = normalizeLanguages(languages);
- BitSet langBitSet = tof_langs.languages;
- // "==" ok here due to normalizeLanguages call above
- if (languages == UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1) {
- if ( ! langBitSet.get(UNSPECIFIED_LANGUAGE_INDEX)) {
- langBitSet.clear();
- langBitSet.set(UNSPECIFIED_LANGUAGE_INDEX);
- setNeedsCompilation();
+ // Second merge
+ // if the types features all have the same lang and are all the features,
+ // set the allFeats flag, and merge in the langs
+ for (RsType t : rsTypesMap) {
+ if (t.hasAllFeaturesExplicitly(mTypeSystem) && t.allFeaturesHaveSameLangs()) {
+ t.isAllFeatures = true;
+ RsLangs l = t.features.features.get(0).languages;
+ if (l != null && RsLangs.isEmpty(l)) {
+ l = null;
+ }
+ if (l != null) {
+ if (t.languagesAllFeat == null) {
+ t.languagesAllFeat = RsLangs.createOrNull(l);
+ } else { // merge in langs l
+ t.languagesAllFeat = RsLangs.addAll(t.languagesAllFeat, l);
+ }
+ }
+ t.features = null;
}
- return;
- }
-
- // languages set already exists; add new ones to existing set
- for (String lang : languages) {
- langBitSet.set(getLanguageIndex(lang));
- }
- setNeedsCompilation();
- }
-
- /**
- * version used by compile to add subtypes
- * @param aTypeName
- * @param aAllAnnotatorFeatures
- * @param languages
- */
- private void addResultType(String name, boolean allAnnotatorFeatures, BitSet languages) {
- ToF_Languages tof_langs = withSubtypesName2tof_langs.get(name);
-
- if (null == tof_langs) {
- withSubtypesName2tof_langs.put(
- name,
- new ToF_Languages(createTypeOrFeature(name, true, allAnnotatorFeatures), (BitSet)languages.clone()));
- return;
- }
-
- // tof_langs entry for this name exists, so update it
- if (allAnnotatorFeatures) {
- if (!tof_langs.tof.isAllAnnotatorFeatures()) {
- tof_langs.tof.setAllAnnotatorFeatures(true);
+ if (t.isSpecified && t.isAllFeatures && equalsOrBothNull(t.languagesAllFeat, t.languagesNotAllFeat)) {
+ t.isSpecified = false;
+ t.languagesNotAllFeat = null;
}
- }
-
- // update the languages by adding the new languages passed in
- tof_langs.languages.or(languages);
+ }
}
-
-
+
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#setResultTypesAndFeatures(org.apache.uima.analysis_engine.TypeOrFeature[])
*/
public void setResultTypesAndFeatures(TypeOrFeature[] aTypesAndFeatures) {
- setResultTypesAndFeatures(aTypesAndFeatures, UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1);
+ setResultTypesAndFeatures(aTypesAndFeatures, ARRAY_X_UNSPEC);
}
/**
@@ -508,34 +287,51 @@ public final class ResultSpecification_i
* java.lang.String[])
*/
public void setResultTypesAndFeatures(TypeOrFeature[] aTypesAndFeatures, String[] aLanguageIDs) {
- name2tof_langs.clear();
+
for (TypeOrFeature tof : aTypesAndFeatures) {
- name2tof_langs.put(tof.getName(), new ToF_Languages(tof, normalizeLanguages(aLanguageIDs)));
- }
- setNeedsCompilation();
+ addResultTof(tof, aLanguageIDs, true);
+ }
}
-
+
+ private void addResultTof(TypeOrFeature tof, String[] langs, boolean replace) {
+ String name = tof.getName();
+ String typeName = null;
+ String shortFeatName = null;
+ int i = name.indexOf(TypeSystem.FEATURE_SEPARATOR);
+ if (i < 0) {
+ typeName = name;
+ rsTypesMap.add(typeName, tof.isAllAnnotatorFeatures(), langs, replace);
+ } else {
+ typeName = name.substring(0, i);
+ shortFeatName = name.substring(i+1);
+ rsTypesMap.add(typeName, shortFeatName, langs, replace);
+ }
+ setCompileNeeded();
+ }
+
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#addResultTypeOrFeature(org.apache.uima.analysis_engine.TypeOrFeature)
*/
public void addResultTypeOrFeature(TypeOrFeature aTypeOrFeature) {
- addTypeOrFeatureInternal(aTypeOrFeature, UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1);
+ addResultTypeOrFeature(aTypeOrFeature, ARRAY_X_UNSPEC);
}
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#addResultTypeOrFeature(org.apache.uima.analysis_engine.TypeOrFeature,
* java.lang.String[])
+ *
+ * Note: Javadoc makes assumption that there's one tof per type, but this design allows 2 (one with allAnnotatorFeatures set or not).
*/
- public void addResultTypeOrFeature(TypeOrFeature aTypeOrFeature, String[] aLanguageIDs) {
- addTypeOrFeatureInternal(aTypeOrFeature, aLanguageIDs);
- }
+ public void addResultTypeOrFeature(TypeOrFeature tof, String[] languages) {
+ addResultTof(tof, languages, true);
+ }
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#addResultType(java.lang.String,
* boolean)
*/
public void addResultType(String aTypeName, boolean aAllAnnotatorFeatures) {
- addTypeOrFeatureInternal(createTypeOrFeature(aTypeName, true, aAllAnnotatorFeatures), UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1);
+ addResultType(aTypeName, aAllAnnotatorFeatures, ARRAY_X_UNSPEC);
}
/**
@@ -543,14 +339,15 @@ public final class ResultSpecification_i
* boolean, java.lang.String[])
*/
public void addResultType(String aTypeName, boolean aAllAnnotatorFeatures, String[] aLanguageIDs) {
- addResultTypeOrFeatureAddLanguage(aTypeName, true, aAllAnnotatorFeatures, aLanguageIDs);
+ rsTypesMap.add(aTypeName, aAllAnnotatorFeatures, aLanguageIDs, false);
+ setCompileNeeded();
}
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#addResultFeature(java.lang.String)
*/
public void addResultFeature(String aFullFeatureName) {
- addResultFeature(aFullFeatureName, UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1);
+ addResultFeature(aFullFeatureName, ARRAY_X_UNSPEC);
}
/**
@@ -558,112 +355,39 @@ public final class ResultSpecification_i
* java.lang.String[])
*/
public void addResultFeature(String aFullFeatureName, String[] aLanguageIDs) {
- addResultTypeOrFeatureAddLanguage(aFullFeatureName, false, false, aLanguageIDs);
+ String typeName = null;
+ String shortFeatName = null;
+ int i = aFullFeatureName.indexOf(TypeSystem.FEATURE_SEPARATOR);
+ typeName = aFullFeatureName.substring(0, i);
+ shortFeatName = aFullFeatureName.substring(i+1);
+ rsTypesMap.add(typeName, shortFeatName, aLanguageIDs, false);
+ setCompileNeeded();
}
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#compile(org.apache.uima.cas.TypeSystem)
+ * @deprecated no longer needed, remove call to this
*/
+ @Deprecated
public void compile(TypeSystem aTypeSystem) {
setTypeSystem(aTypeSystem);
- compileIfNeeded();
+ compile();
}
-
-// private static class TypeToCompile {
-// String name;
-// boolean allFeatures;
-// String[] languages;
-// TypeToCompile(String aName, boolean aAllFeatures, String[] aLanguages) {
-// name = aName;
-// allFeatures = aAllFeatures;
-// languages = aLanguages;
-// }
-// }
-
- private void compile() {
- if (null == mTypeSystem) {
- return;
- }
-
- needsCompilation = false;
- // get set of current type names
- // for each name, get set of implied additional names (allAnnotatorFeatures and subtypes), recursively
- // add with languages
- // issue: can a result spec hold for language 1 types a b c, for language 2 types a b? yes
- // can it hold for lang 1 type a(allfeats) and for lang 2 type a(not all feat)? no
-
-// Map<String, TypeToCompile> typesToCompile = new HashMap<String, TypeToCompile>(mNameToTofLang.size());
-// for (ToF_Languages tof_langs : mNameToTofLang.values()) {
-// TypeOrFeature tof = tof_langs.tof;
-// if (tof.isType()) {
-// String typeName = tof.getName();
-// typesToCompile.put(typeName, new TypeToCompile(typeName, tof.isAllAnnotatorFeatures(), tof_langs.languages));
-// }
-// }
-
- for (ToF_Languages tof_langs : name2tof_langs.values()) {
- TypeOrFeature tof = tof_langs.tof;
-
- addResultType(tof.getName(), tof.isAllAnnotatorFeatures(), tof_langs.languages);
-
- if (tof.isType()) {
- compileTypeRecursively(mTypeSystem.getType(tof.getName()), tof.isAllAnnotatorFeatures(), tof_langs.languages);
- }
+ private TypeOrFeature createTypeOrFeature(String name, boolean isType, boolean aAllAnnotatorFeatures) {
+ TypeOrFeature r = new TypeOrFeature_impl();
+ r.setType(isType);
+ r.setName(name);
+ if (isType) {
+ r.setAllAnnotatorFeatures(aAllAnnotatorFeatures);
}
+ return r;
}
-
-// mCompiledNameToLanguageMap.clear();
-// for (Map.Entry<TypeOrFeature, Set<String>> elem : mTypesAndFeatures.entrySet()) {
-// TypeOrFeature tof = elem.getKey();
-// if (tof.isType()) {
-// Type t = aTypeSystem.getType(tof.getName());
-// if (t != null) {
-// addTypeRecursive(t, aTypeSystem, elem.getValue(), tof.isAllAnnotatorFeatures());
-// }
-// } else { // feature
-// mCompiledNameToLanguageMap.put(tof.getName(), elem.getValue());
-// }
-// }
-// // TODO: process the set of intersections
-// }
-
- private void compileTypeRecursively(Type type, boolean allFeatures, BitSet languages) {
-
- if (null != type) {
-// if (allFeatures) {
-// for (Feature f : (List<Feature>) type.getFeatures()) {
-// addResultFeature(f.getName(), languages); // this add "merges"
-// // langauges with existing
-// // ones
-// }
-// }
-
- for (Type subType : (List<Type>) mTypeSystem.getDirectSubtypes(type)) {
- String subTypeName = subType.getName();
- addResultType(subTypeName, allFeatures, languages);
- compileTypeRecursively(subType, allFeatures, languages);
- }
- }
+
+ private TypeOrFeature createTypeOrFeature(String typeName, String featureName) {
+ return createTypeOrFeature(typeName + TypeSystem.FEATURE_SEPARATOR + featureName, false, false);
}
-// /**
-// * @param t
-// */
-// private void addTypeRecursive(Type type, TypeSystem typeSystem, Set<String> languages,
-// boolean allFeatures) {
-// mCompiledNameToLanguageMap.put(type.getName(), languages);
-// if (allFeatures) {
-// for (Feature f : (List<Feature>)type.getFeatures()) {
-// mCompiledNameToLanguageMap.put(f.getName(), languages);
-// }
-// }
-// // recurse on subtypes
-// for (Type subtype : (List<Type>)typeSystem.getDirectSubtypes(type)) {
-// addTypeRecursive(subtype, typeSystem, languages, allFeatures);
-// }
-// }
-
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#containsType(java.lang.String)
*/
@@ -673,16 +397,27 @@ public final class ResultSpecification_i
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#containsType(java.lang.String,java.lang.String)
+ * method:
+ *
+ * for each type (with all-feat, without all-feat):
+ * for each type, and supertypes
+ * check if one of the resultSpec languages subsumes the given language.
+ * if so, return true
+ * return false;
+ *
+ * But: cache this: key = int[2]: type#, langi#, value = true/false
+ *
*/
- public boolean containsType(String aTypeName, String language) {
- language = Language.normalize(language);
-
- if (aTypeName.indexOf(TypeSystem.FEATURE_SEPARATOR) != -1)
+
+ // TODO check cache, normalize language
+ public boolean containsType(String aTypeName, String aLanguage) {
+ if (aTypeName.indexOf(TypeSystem.FEATURE_SEPARATOR) != -1) {
return false; // check against someone passing a feature name here
-
+ }
compileIfNeeded();
- return languageMatches(availName2tof_langs().get(aTypeName), language);
+ return hasLanguage(rsCompiled.get(aTypeName), aLanguage);
}
+
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#containsFeature(java.lang.String)
@@ -690,95 +425,44 @@ public final class ResultSpecification_i
public boolean containsFeature(String aFullFeatureName) {
return containsFeature(aFullFeatureName, Language.UNSPECIFIED_LANGUAGE);
}
-
-
-// int typeEndPosition = aFullFeatureName.indexOf(TypeSystem.FEATURE_SEPARATOR);
-// if (typeEndPosition == -1)
-// return false; // check against someone passing a type name here
-//
-// compileIfNeeded();
-// if (availName2tof_langs().containsKey(aFullFeatureName)) {
-// return true;
-// }
-
- // special code here to return true if the allAnnotatorFeatures flag is set for the type
-// String typeName = aFullFeatureName.substring(0, typeEndPosition);
-// ToF_Languages tof_langs = availName2tof_langs().get(typeName);
-// if (null != tof_langs && tof_langs.tof.isAllAnnotatorFeatures()) {
-// if (null != mTypeSystem) {
-// return null != mTypeSystem.getFeatureByFullName(aFullFeatureName); // verify feature is there
-// }
-// return true;
-// }
-// return false;
-// }
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#containsFeature(java.lang.String,java.lang.String)
*/
- public boolean containsFeature(String aFullFeatureName, String language) {
- language = Language.normalize(language);
- int typeEndPosition = aFullFeatureName.indexOf(TypeSystem.FEATURE_SEPARATOR);
- if (typeEndPosition == -1)
- return false; // check against someone passing a type name here
+ public boolean containsFeature(String aFullFeatureName, String aLanguage) {
+ int i = aFullFeatureName.indexOf(TypeSystem.FEATURE_SEPARATOR);
+ if (i == -1)
+ return false; // check against someone passing a type name here
compileIfNeeded();
- ToF_Languages tof_langs = name2tof_langs.get(aFullFeatureName);
- if (languageMatches(tof_langs, language)) {
+ boolean found = hasLanguage(rsCompiled.get(aFullFeatureName), aLanguage);
+ if (found) {
return true;
}
-
- // special code for allAnnotatorFeatures: return true if type name is found and
- // has all annotator features set
- tof_langs = availName2tof_langs().get(aFullFeatureName.substring(0, typeEndPosition));
- if (null != tof_langs && tof_langs.tof.isAllAnnotatorFeatures() && languageMatches(tof_langs, language)) {
- if (null != mTypeSystem) {
- return null != mTypeSystem.getFeatureByFullName(aFullFeatureName); // verify feature is there
- }
+ // this next bit is to keep the behavior in the case where the type system isn't specified,
+ // the same.
+ RsType t = rsTypesMap.getRsType(aFullFeatureName.substring(0, i)); // look for just the type name
+ if (null != t && t.isAllFeatures && RsLangs.subsumes(t.languagesAllFeat, aLanguage)) {
return true;
}
return false;
}
/**
- * Languages matches if the query language is xxx-yyy and
- * result spec languages contains:
- * x-unspecified
- * xxx-yyy
- * xxx
- *
- * @param tof_langs
+ *
+ * @param rsLangs
* @param language
- * @return
+ * @return
*/
- private boolean languageMatches(ToF_Languages tof_langs, String language) {
- if (null == tof_langs) {
- return false;
- }
- BitSet languages = tof_langs.languages;
- if (languages.get(UNSPECIFIED_LANGUAGE_INDEX) ||
- languages.get(getLanguageIndex(language))) {
- return true;
- }
- String baseLanguage = getBaseLanguage(language);
- return baseLanguage != language && // the != means the base language is different from the language
- // != is OK here
- languages.get(getLanguageIndex(baseLanguage));
+ private static boolean hasLanguage(RsLangs rsLangs, String language) {
+ language = Language.normalize(language);
+ // rsLangs == null means there was no entry in the
+ // rsCompiled map for this type
+ // It does NOT mean x-unspecified
+ return (rsLangs == null) ? false : (RsLangs.subsumes(rsLangs, language));
}
- private boolean languageMatches(ToF_Languages tof_langs, int languageIndex, int baseLanguageIndex) {
- if (null == tof_langs) {
- return false;
- }
- BitSet languages = tof_langs.languages;
- if (languages.get(UNSPECIFIED_LANGUAGE_INDEX) ||
- languages.get(languageIndex)) {
- return true;
- }
- return baseLanguageIndex != languageIndex &&
- languages.get(baseLanguageIndex);
- }
-
+
/**
* @see org.apache.uima.resource.impl.MetaDataObject_impl#getXmlizationInfo()
*/
@@ -805,25 +489,36 @@ public final class ResultSpecification_i
}
for (Capability capability : capabilities) {
TypeOrFeature[] tofs = outputs ? capability.getOutputs() : capability.getInputs();
- String[] supportedLanguages = capability.getLanguagesSupported();
- if (null == supportedLanguages ||
- supportedLanguages.length == 0) {
- supportedLanguages = UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1;
- }
+
for (TypeOrFeature tof : tofs) {
- addResultTypeOrFeatureAddLanguage(tof, supportedLanguages);
+ String typeName = tof.getName();
+ if (!tof.isType()) {
+ int i = typeName.indexOf(TypeSystem.FEATURE_SEPARATOR);
+ String shortFeatName = typeName.substring(i+1);
+ typeName = typeName.substring(0, i);
+ rsTypesMap.add(typeName, shortFeatName, capability.getLanguagesSupported(), false);
+ } else {
+ rsTypesMap.add(typeName, tof.isAllAnnotatorFeatures(), capability.getLanguagesSupported(), false);
+ }
}
}
- setNeedsCompilation();
+ setCompileNeeded();
}
/**
* @see org.apache.uima.analysis_engine.ResultSpecification#removeTypeOrFeature(org.apache.uima.analysis_engine.TypeOrFeature)
+ * This removes the type or feature for all languages.
+ * Beware: there are two possible ToFs one with allFeatures set or not (if they have different languages).
*/
- public void removeTypeOrFeature(TypeOrFeature aTypeOrFeature) {
- // remove Type or Feature from the
- name2tof_langs.remove(aTypeOrFeature.getName());
- setNeedsCompilation(); // may have removed something which had subtypes
+ public void removeTypeOrFeature(TypeOrFeature tof) {
+ String name = tof.getName();
+ if (tof.isType()) {
+ rsTypesMap.remove(name);
+ } else {
+ int i = name.indexOf(TypeSystem.FEATURE_SEPARATOR);
+ rsTypesMap.remove(name.substring(0, i), name.substring(i+1));
+ }
+ setCompileNeeded();
}
/**
@@ -842,11 +537,8 @@ public final class ResultSpecification_i
}
public void setTypeSystem(TypeSystem ts) {
- if (mTypeSystem == ts) {
- return;
- }
mTypeSystem = ts;
- setNeedsCompilation();
+ setCompileNeeded();
}
public TypeSystem getTypeSystem() {
@@ -856,13 +548,119 @@ public final class ResultSpecification_i
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("org.apache.uima.analysis_engine.impl.ResultSpecification_impl:\n);");
- sb.append("needsCompilation = ").append(needsCompilation).append("\n");
- sb.append("lang2int = ").append(lang2int).append("\n");
- sb.append("name2tof_langs = ").append(name2tof_langs).append("\n");
- sb.append("withSubtypesName2tof_langs = ").append(withSubtypesName2tof_langs).append("\n");
+ sb.append(" needsCompilation = ").append(needsCompilation).append("\n");
+// sb.append("lang2int = ").append(lang2int).append("\n");
+// sb.append("name2tof_langs = ").append(name2tof_langs).append("\n");
+// sb.append("withSubtypesName2tof_langs = ").append(withSubtypesName2tof_langs).append("\n");
+ sb.append("rsTofLangs = ").append(rsTypesMap);
sb.append("mTypeSystem = ").append(mTypeSystem).append("\n");
return sb.toString();
}
+
+ private void compileIfNeeded() {
+ if (needsCompilation) {
+ needsCompilation = false;
+ compile();
+ }
+ }
+
+ private void setCompileNeeded() {
+ needsCompilation = true;
+ rsCompiled.clear();
+ }
+
+ /**
+ * create a fully expanded version of this result spec
+ */
+
+ private void compile() {
+ for (RsType rst : rsTypesMap) {
+ if (rst.isSpecified) {
+ addCompiledFormForTypeAndItsSubtypes(rst, rst.languagesNotAllFeat);
+ }
+ if (rst.isAllFeatures) {
+ addCompiledFormForTypeAndItsSubtypes(rst, rst.languagesAllFeat);
+
+ for (Feature f : rst.getAllAppropriateFeatures(mTypeSystem)) {
+ addCompiledFormForFeatureAndItsSubtypes(rst, f.getShortName(), rst.languagesAllFeat);
+ }
+ }
+ if (rst.features != null) {
+ for (RsFeat rsf : rst.features) {
+ addCompiledFormForFeatureAndItsSubtypes(rst, rsf.shortFeatName, rsf.languages);
+ }
+ }
+ }
+ }
+
+ private void addCompiledFormForTypeAndItsSubtypes(RsType rst, RsLangs langs) {
+ addCompiledFormEntry(rst.typeName, langs);
+ for (String subtypeName : subtypeNames(rst.typeName)) {
+ addCompiledFormEntry(subtypeName, langs);
+ }
+ }
+
+ /**
+ * Note: the string typeXXX:featYYY may not be in the type system.
+ * For instance, if featYYY is introduced in type Foo, we could have a spec of
+ * FooSubtype:featYYY; this string could be unique to the result spec
+ * @param rst
+ * @param shortFeatName
+ * @param langs
+ */
+ private void addCompiledFormForFeatureAndItsSubtypes(RsType rst, String shortFeatName, RsLangs langs) {
+ addCompiledFormEntry(RsFullFeatNames.getFullFeatName(rst.typeName, shortFeatName), langs);
+ for (String subtypeName : subtypeNames(rst.typeName)) {
+ addCompiledFormEntry(RsFullFeatNames.getFullFeatName(subtypeName, shortFeatName), langs);
+ }
+ }
+
+ /**
+ * Adds languages to a type or feature
+ * @param tofName
+ * @param languagesToAdd
+ */
+ private void addCompiledFormEntry(String tofName, RsLangs languagesToAdd) {
+ if (languagesToAdd == null) {
+ languagesToAdd = compiledXunspecified;
+ }
+ RsLangs rsLangs = rsCompiled.get(tofName);
+ if (null == rsLangs) {
+ if (languagesToAdd != compiledXunspecified) {
+ languagesToAdd.setShared();
+ }
+ rsCompiled.put(tofName, languagesToAdd);
+ return;
+ }
+ RsLangs.addAll(rsLangs, languagesToAdd);
+ }
+
+ private Iterable<String> subtypeNames(final String typeName) {
+ final TypeSystemImpl ts = (TypeSystemImpl) mTypeSystem;
+ return new Iterable<String>() {
+
+ public Iterator<String> iterator() {
+ return new Iterator<String>() {
+ Type t = (null == ts) ? null : ts.getType(typeName);
+ List<Type> subtypes = (null == ts) ? EMPTY_TYPE_LIST
+ : (null == t ) ? EMPTY_TYPE_LIST
+ : ts.getProperlySubsumedTypes(t);
+ int i = 0;
+
+ public boolean hasNext() {
+ return i < subtypes.size();
+ }
+
+ public String next() {
+ return subtypes.get(i++).getName();
+ }
+
+ public void remove() {throw new UnsupportedOperationException();}
+
+ };
+ }
+ };
+ }
/**
* Compute the feature/type + language intersection of two result specs
@@ -873,182 +671,69 @@ public final class ResultSpecification_i
* Each is a set of languages, interpreted as a "Union".
* If the set contains x-unspecified - it is taken to mean all languages
* if the set contains XX - it is taken to mean the union of all sublanguages XX-yy
- *
- * package scope
*/
- static ResultSpecification_impl intersect(ResultSpecification rs1in, ResultSpecification_impl rs2in) {
- ResultSpecification_impl rs1 = (ResultSpecification_impl) rs1in;
- ResultSpecification_impl rs2 = (ResultSpecification_impl) rs2in;
- ResultSpecification_impl newRs = new ResultSpecification_impl(rs1.getTypeSystem());
+
+ ResultSpecification_impl intersect(ResultSpecification_impl rsOther) {
- rs1.compileIfNeeded(); // compile to make the next tests for type intersecting work
- rs2.compileIfNeeded();
+ ResultSpecification_impl r = new ResultSpecification_impl();
+ r.setTypeSystem(rsOther.mTypeSystem);
- // iterate over all types and features in this component's result set
- for (Map.Entry<String, ToF_Languages> item : rs2.availName2tof_langs().entrySet()) {
- String rs2tof = item.getKey();
- ToF_Languages rs2Langs = item.getValue();
- // see if in other resultSpec
- ToF_Languages rs1Langs = rs1.getLanguagesForTypeOrFeature(rs2tof);
- if (rs1Langs == null) {
- continue;
- }
-
- // Type or Feature is in both; intersect the languages
- // if either has language x-unspecified, use the other's language spec.
- if (rs1Langs.languages.get(ResultSpecification_impl.UNSPECIFIED_LANGUAGE_INDEX)) {
- newRs.addClonedToF_Languages(rs2Langs, rs2);
- continue;
- }
- if (rs2Langs.languages.get(ResultSpecification_impl.UNSPECIFIED_LANGUAGE_INDEX)) {
- newRs.addClonedToF_Languages(rs1Langs, rs1);
- continue;
+ r.compileIfNeeded();
+ rsOther.compileIfNeeded();
+ compileIfNeeded();
+
+ /**
+ * Iterate over other
+ */
+ for (Iterator<Entry<String, RsLangs>> it = rsOther.rsCompiled.entrySet().iterator(); it.hasNext();) {
+ Entry<String, RsLangs> e = it.next();
+ String tofName = e.getKey();
+ RsLangs otherRsLangs = e.getValue();
+
+ /**
+ * Get corresponding languages from this side
+ */
+ RsLangs thisRsLangs = rsCompiled.get(tofName);
+ if (null == thisRsLangs) {
+ continue; // null does NOT mean x-unspecified, it means tof is not present in compiled map at all
}
-
- // Intersect languages - neither has x-unspecified
-
- List<String> rsltLangs = computeResultLangIntersection(rs1, rs1Langs, rs2, rs2Langs);
-
- if (rsltLangs.size() > 0) {
- newRs.addResultTypeOrFeature(rs2Langs.tof, rsltLangs.toArray(new String[rsltLangs.size()]));
+
+ /**
+ * Intersect languages, with subsumption
+ */
+ RsLangs intersectRsLangs = thisRsLangs.intersect(otherRsLangs);
+ if (intersectRsLangs != null) {
+ r.addCompiledFormEntry(tofName, intersectRsLangs);
}
}
- return newRs;
+ return r;
}
- private static List<String> computeResultLangIntersection(
- ResultSpecification_impl rs1, ToF_Languages rs1Langs,
- ResultSpecification_impl rs2, ToF_Languages rs2Langs) {
-
- BitSet rs1bs = rs1Langs.languages;
- BitSet rs2bs = rs2Langs.languages;
- List<String> rsltLangs = new ArrayList<String>();
-
- // because we don't have a list of languages as "Strings",
- // iterate over all the languages, and skip those not in this
- // type-or-feature
- for (Map.Entry<String, Integer> langIndex2 : rs2.lang2int.entrySet()) {
- if (!rs2bs.get(langIndex2.getValue())) {
- continue;
- }
-
- // String intersectLang = intersectLanguages(langIndex.getKey(),
- // rs1Langs, rs2Langs);
-
- String thisLang = langIndex2.getKey();
- if (rs1bs.get(rs1.getLanguageIndex(thisLang))) {
- rsltLangs.add(thisLang);
- continue;
- }
-
- // thisLang is not in the set of rs1 languages, but it might still be
- // in the intersection, if thisLang is not a base form, and the base
- // form
- // *is* in the set of rs1 languages
- String baseLang = getBaseLanguage(thisLang);
- if (baseLang != thisLang) { // thisLang is not a base form
- if (rs1bs.get(rs1.getLanguageIndex(baseLang))) {
- rsltLangs.add(thisLang);
- continue;
- }
- }
- }
-
- // add in more specific langs in rs1 matching general lang in rs2
-
- // because we don't have a list of languages as "Strings",
- // iterate over all the languages, and skip those not in this
- // type-or-feature
- for (Map.Entry<String, Integer> langIndex1 : rs1.lang2int.entrySet()) {
- if (!rs1bs.get(langIndex1.getValue())) {
- continue;
- }
-
- String rsLang1 = langIndex1.getKey();
- if (rs2bs.get(rs2.getLanguageIndex(rsLang1))) {
- continue; // skip this if already would be in intersection
- }
- String baseLang1 = getBaseLanguage(rsLang1);
- if (rsLang1 != baseLang1) { // rsLang1 is not a base form
- if (rs2bs.get(rs2.getLanguageIndex(baseLang1))) {
- rsltLangs.add(rsLang1); // add specific lang to intersection
- }
- }
- }
- return rsltLangs;
+
+
+ private boolean compiledFormEquals(ResultSpecification_impl other) {
+ compileIfNeeded();
+ other.compileIfNeeded();
+ return rsCompiled.equals(other.rsCompiled); // compares two maps, returns true if have same entries
}
@Override
- public boolean equals(Object obj) {
- if (this == obj) {
- return true;
- }
- if (!super.equals(obj)) {
- return false;
- }
- if (getClass() != obj.getClass()) {
+ public boolean equals(Object aObj) {
+ if (!(aObj instanceof ResultSpecification_impl)) {
return false;
}
- ResultSpecification_impl other = (ResultSpecification_impl) obj;
- if (lang2int == null) {
- if (other.lang2int != null) {
- return false;
- }
- }
- if (mTypeSystem == null) {
- if (other.mTypeSystem != null) {
- return false;
- }
- } else if (mTypeSystem != other.mTypeSystem) {
- return false;
- }
- if (name2tof_langs == null) {
- if (other.name2tof_langs != null) {
- return false;
- }
- }
- this.compileIfNeeded();
- other.compileIfNeeded();
-
- if (withSubtypesName2tof_langs == null) {
- if (other.withSubtypesName2tof_langs != null) {
- return false;
- }
- }
-
- if (availName2tof_langs().size() != other.availName2tof_langs().size()) {
- return false;
- }
-
- // iterate over all types and features in this
- for (Map.Entry<String, ToF_Languages> item : availName2tof_langs().entrySet()) {
- String tof = item.getKey();
- ToF_Languages toflangs = item.getValue();
- ToF_Languages otherToflangs = other.availName2tof_langs().get(tof);
- BitSet thisBs = toflangs.languages;
- BitSet otherBs = otherToflangs.languages;
- if (thisBs.cardinality() != otherBs.cardinality()) {
- return false;
- }
- for (Map.Entry<String, Integer>l2ie : lang2int.entrySet()) {
- if (thisBs.get(l2ie.getValue())) {
- if (!otherBs.get(other.lang2int.get(l2ie.getKey()))) {
- return false;
- }
- }
- }
- }
-
- return true;
+ return compiledFormEquals((ResultSpecification_impl)aObj);
}
- /**
- * Hash code not implemented
- * @return
- */
- @Override
- public int hashCode() {
- throw new UnsupportedOperationException();
+ static boolean equalsOrBothNull(Object x, Object y) {
+ if (null == x && null == y) {
+ return true;
+ }
+ if (null != x && x.equals(y)) {
+ return true;
+ }
+ return false;
}
+
}
Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java Thu Sep 2 13:43:50 2010
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+/**
+ * Represents the languages associated with one explicit type:feature in a result spec
+ */
+public class RsFeat {
+ final String shortFeatName; // these are canonical strings, can be compared ==
+ RsLangs languages = null;
+
+ RsFeat(String shortName, String[] languages) {
+ this.shortFeatName = shortName;
+ this.languages = RsLangs.createOrNull(languages);
+ }
+
+ RsFeat(String shortName, RsLangs languages) {
+ this.shortFeatName = shortName;
+ if (null != languages) {
+ languages.setShared();
+ }
+ this.languages = languages;
+ }
+
+ RsFeat(RsFeat original) {
+ shortFeatName = original.shortFeatName;
+ if (null != original.languages) {
+ original.languages.setShared();
+ }
+ languages = original.languages;
+ }
+
+ boolean subsumes(String language) {
+ return RsLangs.subsumes(languages, language);
+ }
+
+}
Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java Thu Sep 2 13:43:50 2010
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * represents the updateable list of features, each with a particular language spec
+ * a given feature only appears once in the list, with the union of all languages
+ */
+public class RsFeats implements Iterable<RsFeat> {
+ List<RsFeat> features = null;
+
+ RsFeats() {}
+
+ /**
+ * copies into a new feature list, shares the languages
+ * @param other
+ */
+ RsFeats(RsFeats other) {
+ if (other.features == null) {
+ features = null;
+ return;
+ }
+ features = new ArrayList<RsFeat>(other.features.size());
+ for (RsFeat f : other.features){
+ features.add(new RsFeat(f));
+ }
+ }
+
+ int size() {
+ return (features == null) ? 0 : features.size();
+ }
+
+ /**
+ * ASSUMES feat not exist in features already
+ * @param feat
+ */
+ void add(String shortFeatName, Object languages) {
+ String[] saLangs;
+ RsLangs rsLangs;
+ RsFeat feat;
+ if (languages instanceof String[]) {
+ saLangs = (String[])languages;
+ feat = new RsFeat(shortFeatName, saLangs);
+ } else {
+ rsLangs = (RsLangs)languages;
+ rsLangs.setShared();
+ feat = new RsFeat(shortFeatName, rsLangs);
+ }
+ if (null == features) {
+ features = new ArrayList<RsFeat>(1);
+ }
+ features.add(feat);
+ }
+
+ /**
+ * Assume features != null
+ * remove a feature, regardless of language(s)
+ * @param shortFeatName
+ */
+ void remove(String typeName, String shortFeatName) {
+ for (Iterator<RsFeat> it = features.iterator(); it.hasNext();) {
+ if (shortFeatName.equals(it.next().shortFeatName)) {
+ it.remove();
+ return;
+ }
+ }
+ }
+
+ boolean contains(String typeName, String shortFeatName) {
+ if (null == features || features.size() == 0) {
+ return false;
+ }
+ return null != get(shortFeatName);
+ }
+
+ /**
+ * linear search in list for short feat name
+ * @param shortFeatName - canonicalized short feature name
+ * @return
+ */
+ RsFeat get(String shortFeatName) {
+ for (RsFeat r : features) {
+ if (r.shortFeatName.equals(shortFeatName)) {
+ return r;
+ }
+ }
+ return null;
+ }
+
+ public Iterator<RsFeat> iterator() {
+ return (null == features) ? nullIterator : features.iterator();
+ }
+
+ final static Iterator<RsFeat> nullIterator = new Iterator<RsFeat>() {
+
+ public boolean hasNext() {
+ return false;
+ }
+
+ public RsFeat next() {
+ return null;
+ }
+
+ public void remove() {
+ }
+ };
+}
Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java Thu Sep 2 13:43:50 2010
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+import org.apache.uima.cas.TypeSystem;
+
+/**
+ * Implements a globally shared weak-reference map between
+ * types & features to the corresponding Full Feature name
+ * Used to avoid creating new full feature names when compiling
+ * result feature specs.
+ * Indexable for features via a 2 step index: typeName (weak) and shortFeatName
+ *
+ */
+public class RsFullFeatNames {
+
+ private static class TypeFeats {
+ private Map<String, String> short2Full = null; // null till used
+ }
+
+ private static final Map<String, TypeFeats> typeName2TypeFeats = new WeakHashMap<String, TypeFeats>();
+
+
+ public static String getFullFeatName(String typeName, String shortFeatName) {
+ synchronized (typeName2TypeFeats) {
+ TypeFeats tf = typeName2TypeFeats.get(typeName);
+ if (null == tf) {
+ tf = new TypeFeats();
+ typeName2TypeFeats.put(typeName, tf);
+ }
+ if (null == tf.short2Full) {
+ tf.short2Full = new HashMap<String, String>(3);
+ } else {
+ String s = tf.short2Full.get(shortFeatName);
+ if (null != s) {
+ return s;
+ }
+ }
+ String fullFeatName = makeFullFeatName(typeName, shortFeatName);
+ tf.short2Full.put(shortFeatName, fullFeatName);
+ return fullFeatName;
+ }
+ }
+
+ private static String makeFullFeatName(String typeName, String shortFeatName) {
+ StringBuilder sb = new StringBuilder(typeName.length() + 1 + shortFeatName.length());
+ return sb.append(typeName).append(TypeSystem.FEATURE_SEPARATOR).append(shortFeatName).toString();
+ }
+
+}
Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java Thu Sep 2 13:43:50 2010
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.uima.cas.text.Language;
+
+/**
+ * Class used to canonicalize language string
+ */
+public class RsLang {
+
+ /**
+ * global set for canonical language strings
+ */
+ private static final Map<String, String> canonicalLanguageStrings = new HashMap<String, String>();
+
+ /**
+ *
+ * @param language
+ * @return x-unspecified if lang is null or a canonical version of the lang string
+ */
+ static String getCanonicalLanguageString(String language) {
+ if (language == null || language.equals(Language.UNSPECIFIED_LANGUAGE)) { // represents x-unspecified
+ return Language.UNSPECIFIED_LANGUAGE;
+ }
+ synchronized(canonicalLanguageStrings) {
+ String cl = canonicalLanguageStrings.get(language);
+ if (cl == null) {
+ // make new string based on trimmed chars if needed, in case holding on to big string
+ language = new String(language);
+ canonicalLanguageStrings.put(language, language);
+ return language;
+ }
+ return cl;
+ }
+ }
+}
Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java Thu Sep 2 13:43:50 2010
@@ -0,0 +1,366 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.uima.cas.text.Language;
+
+/**
+ * A set of languages, each represented by a canonical string object
+ * The set is stored without any subsumed elements
+ *
+ * Instances of this class are shareable
+ * Duplicate-on-update strategy
+ * Requires that all update operations to it return the
+ * possibly new RsLangs object, and that calls are always of the form
+ * rsLangInstance = rsLangInstance.<some-update-operation>
+ * Requires that all copy operations set the shared bit:
+ * copiedInstance = origInstance.setShared();
+ *
+ * A instance marked isShared == true is immutable
+ * Updates cause duplication.
+ *
+ * Users store x-unspecified as null for the rsLangs instance
+ * Because of this, users use static methods, passing in as the first argument,
+ * the value of rsLangs, and getting an updated value of rsLangs.
+ * This allows the passed-in value to be null.
+ *
+ * Languages kept in canonical form:
+ * duplicates removed
+ * subsumed languages removed
+ * language strings mapped to unique strings (allowing == comparisons)
+ * Languages kept in array list, to allow for expansion
+ * Languages not removed, only added (for a given tof)
+ */
+public class RsLangs {
+
+ private ArrayList<String> languages; // set of languages; null means x-unspecified
+ private boolean isShared = false; // support copy on update for languages
+
+ private RsLangs() {}
+
+ // for instance used to represent x-unspec inside compiled forms, where null cant be used
+ static RsLangs createSharableEmpty() {
+ RsLangs rsl = new RsLangs();
+ rsl.setShared();
+ return rsl;
+ }
+
+ static RsLangs createOrNull(String[] languages) {
+ return replaceAll(null, languages);
+ }
+
+ void setShared() {
+ isShared = true;
+ }
+
+ static RsLangs createOrNull(RsLangs rsl) {
+ if (null == rsl || rsl.languages == null) {
+ return null;
+ }
+ rsl.setShared();
+ return rsl;
+ }
+
+ // make a copy when needed
+ private RsLangs(RsLangs original) {
+ languages = (null == original.languages) ? null : new ArrayList<String>(original.languages);
+ }
+
+ static boolean isEmpty(RsLangs rsl) {
+ return rsl == null || rsl.languages == null || rsl.languages.size() == 0;
+ }
+
+ /**
+ *
+ * @param rsl may be null (means x-unspec, subsumes all)
+ * @param lang
+ * @return
+ */
+ static boolean subsumes(RsLangs rsl, String lang) {
+ return subsumesCanonical(rsl, RsLang.getCanonicalLanguageString(lang));
+ }
+ /**
+ *
+ * @param lang
+ * @return true if any of the rsLangs subsumes the param lang
+ */
+ static boolean subsumesCanonical(RsLangs rsl, String lang) {
+ if (null == rsl || null == rsl.languages) { // don't test for size() == 0 - that's used by replace to indicate empty, not x-unspec
+ return true; // x-unspecified subsumes all
+ }
+ if (null == lang || lang == Language.UNSPECIFIED_LANGUAGE) {
+ return false; // x-unspec not subsumed by anything (other than x-unspec)
+ }
+ String baseLang = getBaseLanguage(lang);
+ for (String rsLang : rsl.languages) {
+ if (subsumesCanonical(rsLang, lang, baseLang)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ *
+ * @param rsl assumed to be not null, not x-unspec
+ * @param lang assumed to be not null, not x-unspec
+ * @param baseLang
+ * @return
+ */
+ private static boolean subsumesCanonical(RsLangs rsl, String lang, String baseLang) {
+ for (String rsLang : rsl.languages) {
+ if (subsumesCanonical(rsLang, lang, baseLang)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean subsumesCanonical(String containingLang, String langToTest, String langToTestBase) {
+ return containingLang == langToTest || containingLang == langToTestBase;
+ }
+
+ /**
+ *
+ * @param language (must not be null)
+ * @return the same == language or the base form of the language
+ */
+ private static String getBaseLanguage(String language) {
+ String baseLanguage = language;
+ int index = language.indexOf(Language.CANONICAL_LANG_SEPARATOR);
+ if (index > -1) {
+ baseLanguage = RsLang.getCanonicalLanguageString(language.substring(0, index));
+ }
+ return baseLanguage;
+ }
+
+ /**
+ *
+ * @param rsl could be null meaning current is x-unspecified
+ * @param langs null means x-unspecified
+ * @return null (meaning x-unspecified, or an instance of RsLangs
+ */
+ static RsLangs replaceAll(RsLangs rsl, String[] langs) {
+ if (rsl == null || rsl.languages == null) {
+ if (langs == null) {
+ return null;
+ }
+ if (rsl == null || rsl.isShared) {
+ rsl = new RsLangs();
+ }
+ rsl.languages = new ArrayList<String>(1); // special form means empty, not x-unspec
+ }
+ return addAll(rsl, langs);
+ }
+
+ static RsLangs addAll(RsLangs rsl, String[] langs) {
+ if (null == langs ||
+ null == rsl || null == rsl.languages) { // because x-unspec subsumes all
+ return rsl;
+ } else {
+ for (String lang : langs) {
+ rsl = add(rsl, lang);
+ }
+ return rsl;
+ }
+ }
+
+ static RsLangs addAll(RsLangs rsl, RsLangs rsLangs) {
+ if (null == rsLangs || null == rsLangs.languages ||
+ null == rsl || null == rsl.languages) { // because x-unspec subsumes all
+ return rsl;
+ }
+ for (String lang : rsLangs.languages) {
+ rsl = add(rsl, lang);
+ }
+ return rsl;
+ }
+
+ /**
+ * add language unless it's subsumed by existing one
+ * remove any languages the newly added one subsumes
+ * store x-unspec as null
+ * @param rsl - is not null and has non-null languages array (may be empty)
+ * @param lang - may be null or x-unspec
+ */
+ static RsLangs add(RsLangs rsl, String lang) {
+ lang = RsLang.getCanonicalLanguageString(lang);
+ if (lang == Language.UNSPECIFIED_LANGUAGE) {
+ return null;
+ }
+ String baseLang = getBaseLanguage(lang);
+ if (!subsumesCanonical(rsl, lang, baseLang)) {
+ if (rsl.isShared) {
+ rsl = new RsLangs(rsl);
+ }
+ rsl.removeSubsumedLanguages(lang, baseLang); // remove subsumed lang, but leave as empty list if all removed
+ rsl.languages.add(lang);
+ }
+ return rsl;
+ }
+
+ /**
+ * Remove languages that are subsumed by the argument
+ * If all removed, keep as empty list
+ * @param canonicalLang
+ */
+ private void removeSubsumedLanguages(String canonicalLang, String baseLang) {
+ for (Iterator<String> it = languages.iterator(); it.hasNext();) {
+ if (subsumesCanonical(it.next(), canonicalLang, baseLang)) {
+ it.remove();
+ }
+ }
+ }
+
+ /**
+ *
+ * @param other
+ * @return null for empty intersection (null doesn't mean x-unspecified here)
+ */
+ RsLangs intersect(RsLangs other) {
+ if (null == other) {
+ return null;
+ }
+
+ if (null == this.languages) { // means x-unspecified, so return the other
+ return other;
+ }
+ if (null == other.languages) { // means x-unspecified, so return the first
+ return this;
+ }
+
+ RsLangs r = new RsLangs();
+ r.languages = new ArrayList<String>(1); // creates an empty, not null arraylist
+
+ for (String lang : this.languages) {
+ if (subsumesCanonical(other, lang)) {
+ r = add(r, lang); // add langs in other that are subsumed by this
+ }
+ }
+ for (String lang : other.languages) {
+ if (subsumesCanonical(this, lang)) { // add langs in this that are subsumed by other
+ r = add(r, lang);
+ }
+ }
+ if (r.languages.size() == 0) {
+ return null;
+ }
+ return r;
+ }
+
+ static String[] toArray(RsLangs rsl) {
+ return (isEmpty(rsl)) ? null : rsl.languages.toArray(new String[rsl.languages.size()]);
+ }
+
+ /**
+ * Must return the same hashcode regardless of the value of isShared, and
+ * treating the values as a set
+ */
+ @Override
+ public int hashCode() {
+ int result = 31;
+ for (String lang : languages) {
+ result += lang.hashCode(); // non-standard, gives same answer regardless of order
+ }
+ return result;
+ }
+
+ /**
+ * This must return true ignoring the value of isShared, and
+ * treating the lists as a set
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ RsLangs other = (RsLangs) obj;
+ if (languages == null) {
+ if (other.languages != null) {
+ return false;
+ }
+ } else {
+ if (languages.size() != other.languages.size()) {
+ return false;
+ }
+ for (String lang : languages) {
+ if (!other.languages.contains(lang)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+// /**
+// * also canonicalizes the language strings
+// * @param languages
+// * @return
+// */
+// private void canonicalizeRemoveDupsAndSubsumptions(String[] languages) {
+// if (null == languages || languages.length == 0) {
+// this.languages = null;
+// return;
+// }
+//
+// add
+//
+// // have 2 or more languages
+// outer:
+// for (int i = 0; i < languages.size(); i++) {
+// String later = Language.normalize(languages.get(i));
+// if (null == later || later.equals(Language.UNSPECIFIED_LANGUAGE)) {
+// return null;
+// }
+// // compare against all earlier ones
+// for (int j = 0; j < i; j++) {
+// String earlier = languages.get(j);
+// String earlierBase = getBaseLanguage(earlier);
+// if (earlier.equals(later)) {
+// languages.remove(i--);
+// continue outer;
+// }
+// if (earlierBase.equals(later)) { // later subsumes earlier
+// languages.set(i, later);
+// languages.remove(i--);
+// // recursion: handle multiple cases:
+// // replacing earlier with more general later could have it now
+// // subsume others in between earlier and later... so need to rescan
+// return removeDupsAndSubsumptions(languages);
+// }
+// if (earlier.equals(getBaseLanguage(later))) { // earlier subsumes later
+// languages.remove(i--);
+// continue outer;
+// }
+// }
+// }
+// return languages;
+// }
+
+}
Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java Thu Sep 2 13:43:50 2010
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+
+import static org.apache.uima.analysis_engine.impl.ResultSpecification_impl.equalsOrBothNull;
+
+/**
+ * Holds types and/or features with language specs
+ *
+ * These are expected to be sparse with respect to the complete type system
+ *
+ */
+
+public class RsType {
+
+ public final static List<Feature> EMPTY_FEATURE_LIST = new ArrayList<Feature>(0);
+
+ final String typeName;
+ boolean isAllFeatures = false;
+ boolean isSpecified = false; // true if type is specified by itself, without a feature
+ RsLangs languagesAllFeat = null; // languages for this type w/ allFeat null means x-unspec
+ RsLangs languagesNotAllFeat = null; // languages for this type w/o allFeat null means x-unspec
+ RsFeats features = null;
+
+ RsType(String name) {
+ typeName = name;
+ }
+
+ RsType(RsType original) {
+ typeName = original.typeName;
+ isAllFeatures = original.isAllFeatures;
+ isSpecified = original.isSpecified;
+ languagesAllFeat = RsLangs.createOrNull(original.languagesAllFeat);
+ languagesNotAllFeat = RsLangs.createOrNull(original.languagesNotAllFeat);
+ features = (original.features == null) ? null : new RsFeats(original.features);
+ }
+
+ /**
+ *
+ * @param shortFeatName
+ * @param lang
+ * @return true if lang subsumed by langs of the feature
+ * or of the type with all-feats specified
+ */
+ boolean subsumesLanguageInFeat(String shortFeatName, String lang) {
+ if (isAllFeatures && RsLangs.subsumes(languagesAllFeat, lang)) {
+ return true;
+ }
+ RsFeat f = getFeat(shortFeatName);
+ if (null == f) {
+ return false;
+ }
+ return RsLangs.subsumes(f.languages, lang);
+ }
+
+ RsFeat getFeat(String shortFeatName) {
+ if (null == features) {
+ return null;
+ }
+ return features.get(shortFeatName);
+ }
+
+ /**
+ * returns the Features for a type in a result spec
+ * @param ts The type system, may be null
+ * @return
+ */
+ List<Feature> getAllAppropriateFeatures(final TypeSystem ts) {
+ if (null == ts) {
+ return EMPTY_FEATURE_LIST;
+ }
+ Type t = ts.getType(typeName);
+ return (null == t) ? EMPTY_FEATURE_LIST : t.getFeatures();
+ }
+
+ boolean hasAllFeaturesExplicitly(TypeSystem ts) {
+// if (features == null || features.features == null || features.features.size() == 0 || ts == null) {
+// return false;
+// }
+ List<Feature> all = getAllAppropriateFeatures(ts);
+ if (all.size() == 0) {
+ if (features == null || features.features == null || features.features.size() == 0 || ts == null) {
+ return true;
+ }
+ return false;
+ }
+ int fz = (features == null || features.features == null) ? 0 : features.features.size();
+ if (fz == all.size()) {
+ for (Feature f : all) {
+ if (!features.contains(typeName, f.getShortName())) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ boolean allFeaturesHaveSameLangs() {
+ if (features == null) {
+ return false;
+ }
+ int fz = features.size();
+ if (fz == 0) {
+ return false;
+ }
+ if (fz == 1) {
+ return true;
+ }
+ List<RsFeat> rsf = features.features;
+ RsLangs l = rsf.get(0).languages;
+
+ for (int i = 1; i < fz; i++) {
+ RsLangs fl = rsf.get(i).languages;
+ if (!equalsOrBothNull(l, fl)) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
+