You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by jo...@apache.org on 2011/08/12 12:32:54 UTC
svn commit: r1157037 [6/10] - in
/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler: ./ .settings/
META-INF/ icons/ schema/ src/ src/main/ src/main/java/ src/main/java/org/
src/main/java/org/apache/ src/main/java/org/apache/uima/ src/main/jav...
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,692 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerRulePattern;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+import org.apache.uima.textmarker.textruler.core.TextRulerWordConstraint;
+
+public class RapierGeneralizationHelper {
+
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+ // --- ITEM(s) GENERALIZATION
+ // -------------------------------------------------------------------------------------------------------------
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+ private static ArrayList<TextRulerRuleItem> getGeneralizationsForRuleItems(
+ TextRulerRuleItem item1, TextRulerRuleItem item2) {
+ ArrayList<TextRulerRuleItem> item1List = new ArrayList<TextRulerRuleItem>();
+ ArrayList<TextRulerRuleItem> item2List = new ArrayList<TextRulerRuleItem>();
+ if (item1 != null)
+ item1List.add(item1);
+ if (item2 != null)
+ item2List.add(item2);
+ return getGeneralizationsForRuleItemLists(item1List, item2List);
+ }
+
+ // generalize two given pattern elements or pattern element lists and return
+ // all possible generalizations as
+ // TextRulerRuleItem objects
+ private static ArrayList<TextRulerRuleItem> getGeneralizationsForRuleItemLists(
+ ArrayList<TextRulerRuleItem> item1List, ArrayList<TextRulerRuleItem> item2List) {
+ ArrayList<RapierRuleItem> proposedWordConstraints = new ArrayList<RapierRuleItem>();
+ ArrayList<RapierRuleItem> proposedTagConstraints = new ArrayList<RapierRuleItem>();
+ ArrayList<RapierRuleItem> proposedClassConstraints = new ArrayList<RapierRuleItem>();
+ ArrayList<TextRulerRuleItem> result = new ArrayList<TextRulerRuleItem>();
+
+ int resultListLen1 = 0;
+ int resultListLen2 = 0;
+ boolean oneListIsEmpty = false;
+
+ if (item1List.size() == 0 && item2List.size() == 0) {
+ TextRulerToolkit.log("ERROR !");
+ }
+ if (item1List.size() == 0 || item2List.size() == 0) {
+ // TextRulerToolkit.log("SPECIAL CASE WITH ONE LIST OF ZERO SIZE");
+ oneListIsEmpty = true;
+ }
+
+ boolean hasEmptyWordList = false;
+ int maxWordCount = 0;
+ boolean hasEmptyTagList = false;
+ int maxTagCount = 0;
+ for (TextRulerRuleItem rt : item2List) {
+ RapierRuleItem t = (RapierRuleItem) rt;
+ resultListLen2 += t.isListItem() ? t.listLen() : 1;
+ if (t.getWordConstraints().size() > maxWordCount)
+ maxWordCount = t.getWordConstraints().size();
+ if (t.getWordConstraints().size() == 0)
+ hasEmptyWordList = true;
+ if (t.getTagConstraints().size() > maxTagCount)
+ maxTagCount = t.getTagConstraints().size();
+ if (t.getTagConstraints().size() == 0)
+ hasEmptyTagList = true;
+ }
+ for (TextRulerRuleItem rt : item1List) {
+ RapierRuleItem t = (RapierRuleItem) rt;
+ resultListLen1 += t.isListItem() ? t.listLen() : 1;
+ if (t.getWordConstraints().size() > maxWordCount)
+ maxWordCount = t.getWordConstraints().size();
+ if (t.getWordConstraints().size() == 0)
+ hasEmptyWordList = true;
+ if (t.getTagConstraints().size() > maxTagCount)
+ maxTagCount = t.getTagConstraints().size();
+ if (t.getTagConstraints().size() == 0)
+ hasEmptyTagList = true;
+ }
+ int resultListLen = resultListLen1 > resultListLen2 ? resultListLen1 : resultListLen2; // take
+ // the
+ // bigger
+ // of
+ // both
+ if (resultListLen == 1 && !oneListIsEmpty)
+ resultListLen = 0; // lists with a length of 1 can only occur when
+ // one itemList is empty! THAT CANNOT HAPPEN
+ // HERE!
+
+ // generalize word constraints:
+ if (hasEmptyWordList) // at least one constraint of both is empty
+ {
+ // do nothing here, proposed.wordItems stays empty
+ proposedWordConstraints.add(new RapierRuleItem());
+ } else // create union of both constraints AND (if both constraints
+ // weren't the same) drop constraint
+ {
+ RapierRuleItem proposed = new RapierRuleItem();
+ for (TextRulerRuleItem t : item1List)
+ proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());
+ for (TextRulerRuleItem t : item2List)
+ proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());
+
+ proposedWordConstraints.add(proposed);
+
+ // if the union of both constraints is a real union (one does not
+ // subsume the other completely),
+ // we have to add the DROPPING OF THE CONSTRAINT as a second
+ // proposed word constraint
+ if (maxWordCount != proposed.getWordConstraints().size()) // the
+ // union
+ // is a
+ // real
+ // bigger
+ // set
+ // than
+ {
+ proposedWordConstraints.add(new RapierRuleItem());
+ }
+ }
+
+ if (hasEmptyTagList) // at least one constraint of both is empty
+ {
+ // do nothing here, proposed.tagItems stays empty
+ proposedTagConstraints.add(new RapierRuleItem());
+ } else // create union of both constraints AND (if both constraints
+ // weren't the same) drop constraint
+ {
+ RapierRuleItem proposed = new RapierRuleItem();
+ for (TextRulerRuleItem t : item1List)
+ proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());
+ for (TextRulerRuleItem t : item2List)
+ proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());
+
+ proposedTagConstraints.add(proposed);
+
+ // if the union of both constraints is a real union (one does not
+ // subsume the other completely),
+ // we have to add the DROPPING OF THE CONSTRAINT as a second
+ // proposed tag constraint
+ if (maxTagCount != proposed.getTagConstraints().size()) // the union
+ // is a real
+ // bigger
+ // set than
+ {
+ proposedTagConstraints.add(new RapierRuleItem());
+ }
+ }
+
+ // TODO semantic class generalization
+ proposedClassConstraints.add(new RapierRuleItem()); // add only NO
+ // class
+ // constraint
+ // version for
+ // now!
+
+ // finally, create all combinations of the above proposed items
+ for (RapierRuleItem wt : proposedWordConstraints) {
+ for (RapierRuleItem tt : proposedTagConstraints) {
+ for (RapierRuleItem ct : proposedClassConstraints) {
+ RapierRuleItem newItem = new RapierRuleItem();
+ for (TextRulerWordConstraint wi : wt.getWordConstraints())
+ newItem.addWordConstraint(wi.copy());
+ for (String ti : tt.getTagConstraints())
+ newItem.addTagConstraint(ti);
+ for (String tc : ct.getClassConstraints())
+ newItem.addClassConstraint(tc);
+ newItem.setListLen(resultListLen);
+ newItem.setListBeginsAtZero(oneListIsEmpty && resultListLen > 0);
+ result.add(newItem);
+ }
+ }
+ }
+ return result;
+ }
+
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+ // --- EQUAL SIZE PATTERN GENERALIZATION
+ // --------------------------------------------------------------------------------------------------
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+ private static ArrayList<TextRulerRulePattern> getGeneralizationsForRuleItemPatternsOfEqualSize(
+ TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+ ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+
+ ArrayList<ArrayList<TextRulerRuleItem>> generalizationTable = new ArrayList<ArrayList<TextRulerRuleItem>>();
+ Iterator<TextRulerRuleItem> it2 = pattern2.iterator();
+ if (pattern1.size() != pattern2.size()) {
+ TextRulerToolkit.log("ERROR!");
+ }
+ for (TextRulerRuleItem item1 : pattern1) {
+ TextRulerRuleItem item2 = it2.next();
+ // get all possible LGGs of the current two elements and save them
+ // into the matrix
+ ArrayList<TextRulerRuleItem> allLGGs = getGeneralizationsForRuleItems(item1, item2);
+ generalizationTable.add(allLGGs);
+ // TextRulerToolkit.log("--- GET GENERALISATIONS FOR TWO TERMS: --"+t1+"-- --"+t2+"--");
+ // ArrayList<MLRapierRuleTerm> allLGGs =
+ // this.getGeneralizationsForRuleTerms(t1, t2);
+ // for (MLRapierRuleTerm term : allLGGs)
+ // TextRulerToolkit.log("--- "+term);
+ // TextRulerToolkit.log("--- END");
+ }
+
+ // now we have patternSize lists of possible generalizations, one list
+ // per original pattern item pair of
+ // pattern1 and pattern2. we now have to build all possible
+ // combinations. Each combination is a
+ // new pattern
+ recursiveBuildAllRuleItemCombinations(generalizationTable, 0, new TextRulerRulePattern(),
+ resultList);
+ return resultList;
+ }
+
+ private static void recursiveBuildAllRuleItemCombinations(
+ ArrayList<ArrayList<TextRulerRuleItem>> table, int curIndex,
+ TextRulerRulePattern currentPattern, ArrayList<TextRulerRulePattern> resultPatterns) {
+ if (curIndex >= table.size()) {
+ // make a deep copy of the current pattern:
+ TextRulerRulePattern copy = new TextRulerRulePattern();
+ for (TextRulerRuleItem item : currentPattern)
+ copy.add(item.copy());
+ resultPatterns.add(copy);
+ } else {
+ for (TextRulerRuleItem item : table.get(curIndex)) {
+ currentPattern.add(item);
+ recursiveBuildAllRuleItemCombinations(table, curIndex + 1, currentPattern, resultPatterns);
+ currentPattern.remove(currentPattern.size() - 1);
+ }
+ }
+ }
+
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+ // --- DISTINCT SIZE PATTERN GENERALIZATION
+ // -----------------------------------------------------------------------------------------------
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+
+ // this is the low level version for patterns of distinct sizes! it creates
+ // ALL possible combinations how to pair/map pattern items
+ // of the shorter with those of the longer pattern. the optimizing version
+ // (getOptimizedGeneralizationsForRuleItemPatternsOfDistinctSize)
+ // uses this method for its pattern segments! (see below)
+ private static ArrayList<TextRulerRulePattern> getGeneralizationsForRuleItemPatternsOfDistinctSize(
+ TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+ ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+ if (pattern1.size() == pattern2.size()) {
+ TextRulerToolkit.log("ERROR! CALL getGeneralizationsForRuleItemPatternsOfEqualSize instead!");
+ if (TextRulerToolkit.DEBUG)
+ return null;
+ }
+
+ TextRulerRulePattern longerPattern = pattern1;
+ TextRulerRulePattern shorterPattern = pattern2;
+ if (pattern2.size() > pattern1.size()) {
+ longerPattern = pattern2;
+ shorterPattern = pattern1;
+ }
+
+ if (longerPattern.size() <= 1 || shorterPattern.size() <= 1) {
+ // Special case 1: one of the pattern terms list is empty AND
+ // special case 2: one has only ONE element
+ if (longerPattern.size() + shorterPattern.size() == 0) {
+ TextRulerToolkit.log("ERROR !! BOTH PATTERNS ARE EMPTY!");
+ if (TextRulerToolkit.DEBUG)
+ return null;
+ }
+
+ // get all possible generalizations of the two patterns. result of
+ // each generalization is ONE rule item, so we
+ // don't use TextRulerRulePattern here since this IS NOT a rule
+ // pattern! it's a list of possible generalizations:
+ ArrayList<TextRulerRuleItem> generalizations = getGeneralizationsForRuleItemLists(
+ longerPattern, shorterPattern);
+ // create a one element result pattern for each:
+ for (TextRulerRuleItem item : generalizations) {
+ TextRulerRulePattern p = new TextRulerRulePattern();
+ p.add(item);
+ resultList.add(p);
+ }
+ }
+ // else SPECIAL CASE 3 // TODO make those values configurable ?
+ else if (((longerPattern.size() - shorterPattern.size()) > 6) || (longerPattern.size() > 10)) {
+ int resultListLen1 = 0;
+ for (TextRulerRuleItem rt : shorterPattern)
+ resultListLen1 += ((RapierRuleItem) rt).isListItem() ? ((RapierRuleItem) rt).listLen() : 1;
+ int resultListLen2 = 0;
+ for (TextRulerRuleItem rt : longerPattern)
+ resultListLen2 += ((RapierRuleItem) rt).isListItem() ? ((RapierRuleItem) rt).listLen() : 1;
+
+ RapierRuleItem singleItem = new RapierRuleItem();
+ singleItem.setListLen(resultListLen1 > resultListLen2 ? resultListLen1 : resultListLen2);
+ TextRulerRulePattern singleItemPattern = new TextRulerRulePattern();
+ singleItemPattern.add(singleItem);
+ resultList.add(singleItemPattern);
+ } else { // sizes are different and both > 1
+ // create all possible generalization combinations, that is: how can
+ // we map elements of the shorter pattern
+ // to the ones of the longer pattern and then generalize each
+ // mapping (each group) of items?
+ ArrayList<ArrayList<RapierPatternItemMapping>> combinationList = new ArrayList<ArrayList<RapierPatternItemMapping>>();
+
+ recursiveBuildAllPossiblePatternMappingSequences(longerPattern, shorterPattern,
+ new ArrayList<RapierPatternItemMapping>(), combinationList);
+
+ for (ArrayList<RapierPatternItemMapping> mappingSequence : combinationList) {
+ resultList.addAll(getGeneralizationsForPatternMappingSequence(mappingSequence));
+ }
+ }
+ return resultList;
+ }
+
+ // creates all possible combinations how to pair together items from the
+ // longer and the shorter source pattern, e.g.
+ // 1 2 3 4 5 vs. 1 2 3 = 1/1+2+3 2/4 3/5, ...
+ private static void recursiveBuildAllPossiblePatternMappingSequences(
+ TextRulerRulePattern longerPattern, TextRulerRulePattern shorterPattern,
+ ArrayList<RapierPatternItemMapping> currentMappingSequence,
+ ArrayList<ArrayList<RapierPatternItemMapping>> resultList) {
+ int windowSize = longerPattern.size() - shorterPattern.size() + 1;
+
+ if (shorterPattern.size() > longerPattern.size()) {
+ TextRulerToolkit.log("ERROR: SHORTER > LONGER !!");
+ }
+ if (longerPattern.size() == 0 || shorterPattern.size() == 0) {
+ TextRulerToolkit.log("ERROR: SHORTER == LONGER == 0!");
+ } else {
+ // if the remaining (sub-)patterns are of equal size or one has only
+ // one element left, create one last item mapping and
+ // a final result mapping sequence:
+ if (shorterPattern.size() == 1 || (longerPattern.size() == shorterPattern.size())) {
+ RapierPatternItemMapping lastMapping = new RapierPatternItemMapping();
+ lastMapping.shorterPattern.addAll(shorterPattern);
+ lastMapping.longerPattern.addAll(longerPattern);
+ ArrayList<RapierPatternItemMapping> newMappingSequence = new ArrayList<RapierPatternItemMapping>();
+ newMappingSequence.addAll(currentMappingSequence);
+ newMappingSequence.add(lastMapping);
+ resultList.add(newMappingSequence);
+ } else { // otherwise we have to create all possible combinations of
+ // the longer and shorter remaining pattern:
+ TextRulerRuleItem firstItem = shorterPattern.get(0);
+ // combine with 0, 0/1, ... 0/1/2/.../windowSize-1
+ for (int maxi = 0; maxi < windowSize; maxi++) {
+ RapierPatternItemMapping newMapping = new RapierPatternItemMapping();
+ newMapping.shorterPattern.add(firstItem);
+ for (int li = 0; li <= maxi; li++)
+ newMapping.longerPattern.add(longerPattern.get(li));
+ currentMappingSequence.add(newMapping);
+ TextRulerRulePattern restLongerPattern = new TextRulerRulePattern();
+ TextRulerRulePattern restShorterPattern = new TextRulerRulePattern();
+ for (int i = 1; i < shorterPattern.size(); i++)
+ restShorterPattern.add(shorterPattern.get(i));
+ for (int i = maxi + 1; i < longerPattern.size(); i++)
+ restLongerPattern.add(longerPattern.get(i));
+
+ // recurse:
+ recursiveBuildAllPossiblePatternMappingSequences(restLongerPattern, restShorterPattern,
+ currentMappingSequence, resultList);
+
+ // remove last segment to get back to the same state as
+ // before the recursion:
+ currentMappingSequence.remove(currentMappingSequence.size() - 1);
+ }
+ }
+ }
+ }
+
+ // here the input is called a MAPPING instead of a pattern segmentation in
+ // order to distinguish between the two levels of
+ // dividing the problem: a pattern segmentation is a special mapping of
+ // equal items in the two to generalize source patterns;
+ // the segments that result through that segmentation still need to be
+ // generalized (see getGeneralizationsForPatternSegmentation)
+ // if sucha semgent has subpattersn of different size,
+ // getGeneralizationsForRuleItemPatternsOfDistinctSize is used to
+ // generalize it, which uses THIS METHOD HERE to get all generalizations for
+ // a special MAPPING. a mapping (in comparison to the
+ // segmentation!) is a mapping between the longer and shorter pattern items
+ // which then get directly generalized here!
+ // in order to show this difference, we use the (inernally exactly the
+ // same!) class RapierPatternItemMapping instead of
+ // RapierPatternSegment!)
+ private static ArrayList<TextRulerRulePattern> getGeneralizationsForPatternMappingSequence(
+ ArrayList<RapierPatternItemMapping> patternMappingSequence) {
+ ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+ ArrayList<ArrayList<TextRulerRuleItem>> generalizationTable = new ArrayList<ArrayList<TextRulerRuleItem>>();
+
+ // every mapping has several possible generalizations, so we store all
+ // of them in that generalizationTable, one list of
+ // generalizations for each mapping:
+ for (RapierPatternItemMapping mapping : patternMappingSequence) {
+ ArrayList<TextRulerRuleItem> lggList = getGeneralizationsForRuleItemLists(
+ mapping.shorterPattern, mapping.longerPattern);
+ generalizationTable.add(lggList);
+ }
+
+ // afterwards we have again to create all possible combinations of those
+ // lists (like in the equalSizeGeneralization):
+ // Each combination is a new pattern
+ recursiveBuildAllRuleItemCombinations(generalizationTable, 0, new TextRulerRulePattern(),
+ resultList);
+ return resultList;
+ }
+
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+ // --- FIND MATCHINGS BETWEEN PATTERNS FOR GENERALIZATION
+ // -----------------------------------------------------------------------------------------------
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+
+ // find matches in two patterns by comparing the items of the patterns and
+ // return all possible segmentations of those two patterns.
+ private static void recursiveFindPatternSegmentsByMatchingPatternItems(
+ TextRulerRulePattern longerPattern, TextRulerRulePattern shorterPattern,
+ ArrayList<RapierPatternSegment> currentSegmentation,
+ ArrayList<ArrayList<RapierPatternSegment>> resultList) {
+ int cmpWindowSize = longerPattern.size() - shorterPattern.size() + 1;
+
+ // is one
+ // (rest-)pattern
+ // empty ?
+ if (longerPattern.size() == 0 || shorterPattern.size() == 0) {
+ // create result segmentation
+ ArrayList<RapierPatternSegment> newSegmentation = new ArrayList<RapierPatternSegment>();
+ newSegmentation.addAll(currentSegmentation); // add current
+ // add rest if
+ // recursive state
+ // anything is
+ // left one of
+ // the
+ // patterns
+ if (longerPattern.size() + shorterPattern.size() > 0) {
+ RapierPatternSegment lastSegment = new RapierPatternSegment();
+ for (TextRulerRuleItem i : shorterPattern)
+ lastSegment.shorterPattern.add(i);
+ for (TextRulerRuleItem i : longerPattern)
+ lastSegment.longerPattern.add(i);
+ newSegmentation.add(lastSegment);
+ }
+ resultList.add(newSegmentation);
+ } else {
+ boolean matched = false;
+ for (int si = 0; si < shorterPattern.size(); si++) {
+ // compare element si with si, si+1, ... si+cmpWindowSize-1
+ for (int li = si; li < si + cmpWindowSize; li++) {
+ if (longerPattern.get(li).equals(shorterPattern.get(si))) {
+ // matched pair found!
+ matched = true;
+ // --> calculate pattern segments, add them to the
+ // current one and pass the rest of the
+ // patterns to the next recursion level:
+ RapierPatternSegment newSegment = new RapierPatternSegment();
+ for (int i = 0; i < si; i++)
+ newSegment.shorterPattern.add(shorterPattern.get(i));
+ for (int i = 0; i < li; i++)
+ newSegment.longerPattern.add(longerPattern.get(i));
+
+ boolean addedLeftSegmentation = false;
+ if (newSegment.longerPattern.size() > 0 || newSegment.shorterPattern.size() > 0) {
+ // only add if the segmentation is not empty!
+ currentSegmentation.add(newSegment);
+ addedLeftSegmentation = true;
+ }
+
+ RapierPatternSegment matchedSegment = new RapierPatternSegment();
+ matchedSegment.shorterPattern.add(shorterPattern.get(si));
+ matchedSegment.longerPattern.add(longerPattern.get(li));
+ currentSegmentation.add(matchedSegment);
+
+ // the rest is now the rest to the right of both (so
+ // li+1 and si+1 to the ends...)
+ TextRulerRulePattern restLongerPattern = new TextRulerRulePattern();
+ TextRulerRulePattern restShorterPattern = new TextRulerRulePattern();
+ for (int i = li + 1; i < longerPattern.size(); i++)
+ restLongerPattern.add(longerPattern.get(i));
+ for (int i = si + 1; i < shorterPattern.size(); i++)
+ restShorterPattern.add(shorterPattern.get(i));
+
+ // recurse...
+ if (restLongerPattern.size() > restShorterPattern.size())
+ recursiveFindPatternSegmentsByMatchingPatternItems(restLongerPattern,
+ restShorterPattern, currentSegmentation, resultList);
+ else
+ recursiveFindPatternSegmentsByMatchingPatternItems(restShorterPattern,
+ restLongerPattern, currentSegmentation, resultList);
+
+ // remove added segments so that we are in the same
+ // state as before the recursion:
+ if (addedLeftSegmentation)
+ currentSegmentation.remove(currentSegmentation.size() - 1); // remove the left side
+ // segment
+ currentSegmentation.remove(currentSegmentation.size() - 1); // remove
+ // the
+ // matched
+ // segment
+ }
+ }
+ }
+ if (!matched) // add remaining items of both lists in one pattern
+ // segment
+ {
+ ArrayList<RapierPatternSegment> newSegmentation = new ArrayList<RapierPatternSegment>();
+ newSegmentation.addAll(currentSegmentation);
+
+ RapierPatternSegment lastSegment = new RapierPatternSegment();
+ for (TextRulerRuleItem i : shorterPattern)
+ lastSegment.shorterPattern.add(i);
+ for (TextRulerRuleItem i : longerPattern)
+ lastSegment.longerPattern.add(i);
+ newSegmentation.add(lastSegment);
+ resultList.add(newSegmentation);
+ }
+ }
+ }
+
+ private static ArrayList<TextRulerRulePattern> getGeneralizationsForPatternSegmentation(
+ ArrayList<RapierPatternSegment> patternSegmentation) {
+ // for creating those, we need a table:
+ // each segment of the patternSegmentation creates a bunch of possible
+ // new generalized sub patterns (that's the inner
+ // ArrayList<TextRulerRulePattern>)
+ // since we have a whole sequence of pattern semgents (a whole
+ // segmentation), we need the outer ArrayList to collect
+ // all generalizations of all pattern segments:
+ ArrayList<ArrayList<TextRulerRulePattern>> generalizationTable = new ArrayList<ArrayList<TextRulerRulePattern>>();
+
+ // now, we create all generalizations of each pattern segment and
+ // collect them in that table:
+ for (RapierPatternSegment pSeg : patternSegmentation) {
+ ArrayList<TextRulerRulePattern> pSegGeneralizations;
+
+ if (pSeg.longerPattern.size() == pSeg.shorterPattern.size())
+ pSegGeneralizations = getGeneralizationsForRuleItemPatternsOfEqualSize(pSeg.longerPattern,
+ pSeg.shorterPattern);
+ else
+ pSegGeneralizations = getGeneralizationsForRuleItemPatternsOfDistinctSize(
+ pSeg.longerPattern, pSeg.shorterPattern);
+
+ generalizationTable.add(pSegGeneralizations);
+ }
+
+ // finally, we have to build all combinations of them in form of
+ // MLRulePatterns:
+ ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>(); // the
+ // result
+ // is
+ // a
+ // list
+ // of
+ // new
+ // generalized
+ // patterns
+
+ recursiveBuildAllRuleItemCombinationsFromPatterns(generalizationTable, 0,
+ new TextRulerRulePattern(), resultList);
+
+ return resultList;
+ }
+
+ private static void recursiveBuildAllRuleItemCombinationsFromPatterns(
+ ArrayList<ArrayList<TextRulerRulePattern>> table, int curIndex,
+ TextRulerRulePattern currentPattern, ArrayList<TextRulerRulePattern> resultPatterns) {
+ if (curIndex >= table.size()) {
+ // make a deep copy of the current pattern:
+ TextRulerRulePattern copy = new TextRulerRulePattern();
+ for (TextRulerRuleItem item : currentPattern)
+ copy.add(item.copy());
+ resultPatterns.add(copy);
+ } else {
+ for (TextRulerRulePattern pattern : table.get(curIndex)) {
+ currentPattern.addAll(pattern);
+ recursiveBuildAllRuleItemCombinationsFromPatterns(table, curIndex + 1, currentPattern,
+ resultPatterns);
+ for (int i = 0; i < pattern.size(); i++)
+ currentPattern.remove(currentPattern.size() - 1);
+ }
+ }
+ }
+
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+ // --- WORKING ON PATTERNS OF DISTINCT LENGTH - OPTIMIZED
+ // ---------------------------------------------------------------------------------
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+
+ // "optimized", because this method uses the pattern item matching
+ // optimization (search for equal items and make a segmentation, etc.)
+ private static ArrayList<TextRulerRulePattern> getOptimizedGeneralizationsForRuleItemPatternsOfDistinctSize(
+ TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+ ArrayList<ArrayList<RapierPatternSegment>> matchedCombinationList = new ArrayList<ArrayList<RapierPatternSegment>>();
+
+ // in order to reduce the amount of possible combinations how to combine
+ // the elements of the shorter pattern with the
+ // elements of the longer pattern, we first search for equal pattern
+ // items in both patterns. those equal patterns get hardwired
+ // in the combination and the left and right remaining subpatterns stay
+ // as a "divided smaller problem" that needs to be conquered...
+ // the result of the search is a list of possible segmentations of the
+ // patterns. all semgementations are those "smaller"
+ // left problems that we then need to generalize in the original manner.
+ // if no equal items are found, one segmentation with only one segment
+ // (the orignal longer and shoter pattern) is returned and
+ // has to be generalized.
+ if (pattern1.size() > pattern2.size())
+ recursiveFindPatternSegmentsByMatchingPatternItems(pattern1, pattern2,
+ new ArrayList<RapierPatternSegment>(), matchedCombinationList);
+ else
+ recursiveFindPatternSegmentsByMatchingPatternItems(pattern2, pattern1,
+ new ArrayList<RapierPatternSegment>(), matchedCombinationList);
+
+ // if (TextRulerToolkit.DEBUG && matchedCombinationList.size() > 1)
+ // {
+ // TextRulerToolkit.log("PATTERN SEQUENCES FOUND: "+matchedCombinationList.size());
+ // for (ArrayList<RapierPatternSegment> patternSequence :
+ // matchedCombinationList)
+ // {
+ // TextRulerToolkit.log("\tNEXT SEQUENCE");
+ // for (RapierPatternSegment pSeg : patternSequence)
+ // {
+ // TextRulerToolkit.log("\t\t"+pSeg.longerPattern);
+ // TextRulerToolkit.log("\t\t"+pSeg.shorterPattern);
+ // }
+ // }
+ // }
+
+ ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+
+ for (ArrayList<RapierPatternSegment> patternSegmentation : matchedCombinationList) {
+ // TODO filter out possible duplicates ?
+ resultList.addAll(getGeneralizationsForPatternSegmentation(patternSegmentation));
+ }
+ return resultList;
+ }
+
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+ // --- THE FINAL RESULT: a genarlization method
+ // -------------------------------------------------------------------------------------------
+ // ----------------------------------------------------------------------------------------------------------------------------------------
+
+ // input: two sequences of rule items (=patterns) that shall be
+ // generalized... matchings are searched for a optimized search
+ // and to get a not too big count of generalizations...
+ // result: a (probably very large!) list of possible generalizations, e.g.
+ // used for all slotfiller generalizations of two rules...
+ public static ArrayList<TextRulerRulePattern> getGeneralizationsForRuleItemPatterns(
+ TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+ ArrayList<TextRulerRulePattern> result = new ArrayList<TextRulerRulePattern>();
+
+ // if (TextRulerToolkit.DEBUG)
+ // {
+ // TextRulerToolkit.log("\tgetGeneralizationsForRuleItemPatterns:");
+ // TextRulerToolkit.log("\tPattern1:"+pattern1);
+ // TextRulerToolkit.log("\tPattern2:"+pattern2);
+ // }
+
+ if (pattern1.size() == 0 && pattern2.size() == 0) {
+ return result; // return empty list
+ } else if (pattern1.size() == pattern2.size()) // both have the same
+ // pattern item count
+ {
+ // generalizing is easy then: simply generalize each pair of items:
+ result = getGeneralizationsForRuleItemPatternsOfEqualSize(pattern1, pattern2);
+ } else {
+ // TextRulerToolkit.logIf(TextRulerToolkit.DEBUG && pattern1.size()
+ // == 0 || pattern2.size() == 0, "SpecialCaseWithZeroLength");
+ result = getOptimizedGeneralizationsForRuleItemPatternsOfDistinctSize(pattern1, pattern2);
+ }
+
+ // if (TextRulerToolkit.DEBUG)
+ // {
+ // TextRulerToolkit.log("\t\tGeneralizations: "+result.size());
+ // for (TextRulerRulePattern lggPattern : result)
+ // TextRulerToolkit.log("\t\t\t"+lggPattern);
+ // }
+
+ return result;
+ }
+
+}
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+public class RapierPatternItemMapping extends RapierPatternSegment {
+
+}
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerRulePattern;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+
+public class RapierPatternSegment {
+
+ TextRulerRulePattern shorterPattern = new TextRulerRulePattern();
+
+ TextRulerRulePattern longerPattern = new TextRulerRulePattern();
+
+ public void debugOutput() {
+
+ TextRulerToolkit.log("\n-------------\nShorterList: ");
+ for (TextRulerRuleItem t : shorterPattern)
+ System.out.print(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + " ");
+ TextRulerToolkit.log("");
+
+ System.out.print("LongerList: ");
+ for (TextRulerRuleItem t : longerPattern)
+ System.out.print(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + " ");
+ TextRulerToolkit.log("");
+ }
+}
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.uima.textmarker.textruler.TextRulerPlugin;
+import org.apache.uima.textmarker.textruler.extension.TextRulerController;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerController;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerFactory;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerParameter;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditor;
+import org.eclipse.jface.preference.FieldEditorPreferencePage;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.jface.preference.StringFieldEditor;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+public class RapierPreferencePage extends FieldEditorPreferencePage implements
+ IWorkbenchPreferencePage {
+
+ public static String ID = "org.apache.uima.textmarker.textruler.algorithmPages";
+
+ private TextRulerLearnerController algorithmController;
+
+ private IPreferenceStore store;
+
+ private ArrayList<FieldEditor> fields = new ArrayList<FieldEditor>();
+
+ public RapierPreferencePage() {
+ super(FieldEditorPreferencePage.GRID);
+ TextRulerLearnerController ctrl = TextRulerController
+ .getControllerForID("org.apache.uima.textmarker.textruler.rapier");
+ this.algorithmController = ctrl;
+ store = TextRulerPlugin.getDefault().getPreferenceStore();
+ setPreferenceStore(store);
+ }
+
+ @Override
+ public void init(IWorkbench workbench) {
+ }
+
+ protected void createFieldEditors() {
+ TextRulerLearnerFactory f = algorithmController.getFactory();
+ TextRulerLearnerParameter[] params = f.getAlgorithmParameters();
+ Map<String, Object> values = f.getAlgorithmParameterStandardValues();
+ if (params != null) {
+ for (int i = 0; i < params.length; i++) {
+ TextRulerLearnerParameter p = params[i];
+ String id = algorithmController.getID() + "." + p.id;
+ FieldEditor l = null;
+ switch (p.type) {
+ case ML_BOOL_PARAM: {
+ l = new BooleanFieldEditor(id, p.name, getFieldEditorParent());
+ fields.add(l);
+ addField(l);
+ store.setDefault(id, (Boolean) values.get(p.id));
+ l.setPreferenceStore(store);
+ break;
+ }
+
+ case ML_FLOAT_PARAM:
+ case ML_INT_PARAM:
+ case ML_STRING_PARAM: {
+ l = new StringFieldEditor(id, p.name, getFieldEditorParent());
+ fields.add(l);
+ addField(l);
+ store.setDefault(id, values.get(p.id).toString());
+ l.setPreferenceStore(store);
+ break;
+ }
+ case ML_SELECT_PARAM:
+ break;
+ }
+ }
+ }
+ }
+
+ @Override
+ protected void performDefaults() {
+ for (FieldEditor f : fields)
+ f.loadDefault();
+ // super.performDefaults();
+ }
+
+ @Override
+ public boolean performOk() {
+ for (FieldEditor f : fields)
+ f.store();
+ // return super.performOk();
+ return true;
+ }
+}
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,364 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.ArrayList;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner;
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerRulePattern;
+import org.apache.uima.textmarker.textruler.core.TextRulerSingleSlotRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerStatisticsCollector;
+import org.apache.uima.textmarker.textruler.core.TextRulerTarget;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+
+
+public class RapierRule extends TextRulerSingleSlotRule {
+
+ protected double priority = 0;
+
+ protected RapierRule parent1 = null;
+
+ protected RapierRule parent2 = null;
+
+ protected int parent1PreFiller_n = 0;
+
+ protected int parent2PreFiller_n = 0;
+
+ protected int parent1PostFiller_n = 0;
+
+ protected int parent2PostFiller_n = 0;
+
+ // copy constructor:
+ protected RapierRule(RapierRule copyFrom) {
+ super(copyFrom);
+ priority = copyFrom.priority;
+ parent1 = copyFrom.parent1;
+ parent2 = copyFrom.parent2;
+ parent1PreFiller_n = copyFrom.parent1PreFiller_n;
+ parent2PreFiller_n = copyFrom.parent2PreFiller_n;
+ parent1PostFiller_n = copyFrom.parent1PostFiller_n;
+ parent2PostFiller_n = copyFrom.parent2PostFiller_n;
+ }
+
+ @Override
+ public RapierRule copy() {
+ return new RapierRule(this);
+ }
+
+ public RapierRule(TextRulerBasicLearner parentAlgorithm, TextRulerTarget target) {
+ super(parentAlgorithm, target);
+ }
+
+ public boolean producesOnlyValidFillers() {
+ return coveringStatistics.getTotalCoveredExamples() > 0
+ && coveringStatistics.getCoveredNegativesCount() == 0;
+ }
+
+ public double noiseValue() {
+ int p = coveringStatistics.getCoveredPositivesCount();
+ int n = coveringStatistics.getCoveredNegativesCount();
+ return ((double) (p - n)) / ((double) (p + n)); // p-n/p+n in,
+ // p=positive fillers,
+ // n=spurious fillers
+ }
+
+ public static double log2(double z) {
+ return Math.log(z) / Math.log(2.0);
+ }
+
+ @Override
+ public void setCoveringStatistics(TextRulerStatisticsCollector c) {
+ super.setCoveringStatistics(c);
+ int p = c.getCoveredPositivesCount();
+ int n = c.getCoveredNegativesCount();
+ if (p < 1) {
+ TextRulerToolkit.log("ERROR, A RULE MAY NOT COVER ZERO POSITIVE EXAMPLES! WHAT'S WRONG ?");
+ TextRulerToolkit.log("\tRULE: " + getRuleString());
+ // make sure this rule is rated totally bad:
+ priority = Double.MAX_VALUE;
+
+ } else {
+ // priority = -(Math.log( ((double)(p+1)) / ((double)(p+n+2))) /
+ // Math.log( 2 )) + (((double)this.calculateRuleSize()) /
+ // ((double)p*100.0));
+ priority = ((this.calculateRuleSize()) / ((double) p * 100))
+ - log2(((double) (p + 1)) / ((double) (p + n + 2)));
+
+ if (priority > 10000 && p > 0) {
+ TextRulerToolkit.log("STRANGE RULE PRIORITY ! CHECK THIS!");
+ }
+ }
+ }
+
+ public double getPriority() {
+ return priority;
+ }
+
+ public RapierRule getParent1() {
+ return parent1;
+ }
+
+ public RapierRule getParent2() {
+ return parent2;
+ }
+
+ public void setParent1(RapierRule p) {
+ parent1 = p;
+ }
+
+ public void setParent2(RapierRule p) {
+ parent2 = p;
+ }
+
+ public int getParent1PreFiller_n() {
+ return this.parent1PreFiller_n;
+ }
+
+ public int getParent2PreFiller_n() {
+ return this.parent2PreFiller_n;
+ }
+
+ public void setParent1PreFiller_n(int n) {
+ this.parent1PreFiller_n = n;
+ }
+
+ public void setParent2PreFiller_n(int n) {
+ this.parent2PreFiller_n = n;
+ }
+
+ public int getParent1PostFiller_n() {
+ return this.parent1PostFiller_n;
+ }
+
+ public int getParent2PostFiller_n() {
+ return this.parent2PostFiller_n;
+ }
+
+ public void setParent1PostFiller_n(int n) {
+ this.parent1PostFiller_n = n;
+ }
+
+ public void setParent2PostFiller_n(int n) {
+ this.parent2PostFiller_n = n;
+ }
+
+ public double calculateRuleSize() {
+ double result = 0;
+ for (TextRulerRuleItem ri : slotPattern.preFillerPattern)
+ result += ((RapierRuleItem) ri).getRuleSizePoints();
+ for (TextRulerRuleItem ri : slotPattern.fillerPattern)
+ result += ((RapierRuleItem) ri).getRuleSizePoints();
+ for (TextRulerRuleItem ri : slotPattern.postFillerPattern)
+ result += ((RapierRuleItem) ri).getRuleSizePoints();
+ return result / 100.0;
+ }
+
+ public boolean hasListItemAtBorder() {
+ int cnt = totalItemCount();
+ if (cnt == 0)
+ return false;
+ RapierRuleItem ri = (RapierRuleItem) getRuleItemWithIndex(0);
+ if (ri.isListItem())
+ return true;
+ if (cnt <= 1)
+ return false;
+ ri = (RapierRuleItem) getRuleItemWithIndex(cnt - 1);
+ return (ri.isListItem());
+ }
+
+ public ArrayList<RapierRule> createAllTestRules() {
+ if (!hasListItemAtBorder())
+ return null;
+ ArrayList<RapierRule> result = new ArrayList<RapierRule>();
+
+ RapierRule strippedRule = copy();
+
+ int leftType = 0; // 0=none; 1=prefiller; 2=filler; 3=postfiller
+ RapierRuleItem left = null;
+ if (slotPattern.preFillerPattern.size() > 0) {
+ leftType = 1;
+ left = (RapierRuleItem) slotPattern.preFillerPattern.get(0);
+ if (left.isListItem())
+ strippedRule.getPreFillerPattern().remove(0);
+ } else if (slotPattern.fillerPattern.size() > 0) {
+ leftType = 2;
+ left = (RapierRuleItem) slotPattern.fillerPattern.get(0);
+ if (left.isListItem())
+ strippedRule.getFillerPattern().remove(0);
+ } else if (slotPattern.postFillerPattern.size() > 0) {
+ leftType = 3;
+ left = (RapierRuleItem) slotPattern.postFillerPattern.get(0);
+ if (left.isListItem())
+ strippedRule.getPostFillerPattern().remove(0);
+ }
+
+ if (left != null && !left.isListItem()) {
+ left = null;
+ leftType = 0;
+ }
+
+ int rightType = 0; // 0=none; 1=postfiller; 2=filler; 3=prefiller
+ RapierRuleItem right = null;
+ if (totalItemCount() > 1) {
+ if (slotPattern.postFillerPattern.size() > 0) {
+ rightType = 1;
+ right = (RapierRuleItem) slotPattern.postFillerPattern.get(slotPattern.postFillerPattern
+ .size() - 1);
+ if (right.isListItem())
+ strippedRule.getPostFillerPattern()
+ .remove(strippedRule.getPostFillerPattern().size() - 1);
+ } else if (slotPattern.fillerPattern.size() > 0) {
+ rightType = 2;
+ right = (RapierRuleItem) slotPattern.fillerPattern
+ .get(slotPattern.fillerPattern.size() - 1);
+ if (right.isListItem())
+ strippedRule.getFillerPattern().remove(strippedRule.getFillerPattern().size() - 1);
+ } else if (slotPattern.postFillerPattern.size() > 0) {
+ rightType = 3;
+ right = (RapierRuleItem) slotPattern.preFillerPattern.get(slotPattern.preFillerPattern
+ .size() - 1);
+ if (right.isListItem())
+ strippedRule.getPreFillerPattern().remove(strippedRule.getPreFillerPattern().size() - 1);
+ }
+ }
+ if (right != null && !right.isListItem()) {
+ right = null;
+ rightType = 0;
+ }
+ if (left == null && right == null) {
+ TextRulerToolkit.logIfDebug("HOW CAN THIS BE ?");
+ return null;
+ }
+ int leftCount = left != null ? left.listLen() : 0;
+ int rightCount = right != null ? right.listLen() : 0;
+
+ int leftStart;
+ if (leftCount > 0 && !left.listBeginsAtZero())
+ leftStart = 1;
+ else
+ leftStart = 0;
+ int rightStart;
+ if (rightCount > 0 && !right.listBeginsAtZero())
+ rightStart = 1;
+ else
+ rightStart = 0;
+
+ for (int leftI = leftStart; leftI <= leftCount; leftI++)
+ for (int rightI = rightStart; rightI <= rightCount; rightI++) {
+ RapierRule newRule = strippedRule.copy();
+ if (leftI > 0) {
+ TextRulerRulePattern thePattern = null;
+ if (leftType == 1)
+ thePattern = newRule.getPreFillerPattern();
+ else if (leftType == 2)
+ thePattern = newRule.getFillerPattern();
+ else if (leftType == 3)
+ thePattern = newRule.getPostFillerPattern();
+ for (int i = 0; i < leftI; i++) {
+ RapierRuleItem theItem = left.copy();
+ theItem.setListLen(0); // remove List-Character but add
+ // listI copies instead!!
+ thePattern.add(0, theItem);
+ }
+ }
+ if (rightI > 0) {
+ TextRulerRulePattern thePattern = null;
+ if (rightType == 1)
+ thePattern = newRule.getPostFillerPattern();
+ else if (rightType == 2)
+ thePattern = newRule.getFillerPattern();
+ else if (rightType == 3)
+ thePattern = newRule.getPreFillerPattern();
+ for (int i = 0; i < rightI; i++) {
+ RapierRuleItem theItem = right.copy();
+ theItem.setListLen(0); // remove List-Character but add
+ // listI copies instead!!
+ thePattern.add(theItem);
+ }
+ }
+ newRule.setNeedsCompile(true);
+ if (newRule.totalItemCount() > 0) {
+ // TextRulerToolkit.log(newRule.getRuleString());
+ result.add(newRule);
+ }
+ }
+ return result;
+ }
+
+ @Override
+ protected String getRulesFileContent() {
+ String theString;
+ if (hasListItemAtBorder()) {
+ ArrayList<RapierRule> rules = createAllTestRules();
+
+ theString = "// " + getRuleString() + "\n\n";
+ for (RapierRule r : rules)
+ theString += r.getRuleString() + "\n";
+ } else
+ theString = getRuleString() + "\n";
+ ;
+ return algorithm.getTMFileHeaderString() + theString;
+ }
+
+ public boolean isInitialRule() {
+ return parent1 == null || parent2 == null;
+ }
+
+ protected boolean compressFirstOccurenceOfSubsequentEqualPatternLists(TextRulerRulePattern p) {
+ for (int i = 0; i < p.size() - 1; i++) {
+ RapierRuleItem it1 = (RapierRuleItem) p.get(i);
+ RapierRuleItem it2 = (RapierRuleItem) p.get(i + 1);
+ if (it1.toStringAsNonPatternList().equals(it2.toStringAsNonPatternList())) {
+ if (it1.isListItem() || it2.isListItem()) {
+ boolean fromZero = it1.listBeginsAtZero() && it2.listBeginsAtZero();
+ int listLen = (it1.isListItem() ? it1.listLen() : 1)
+ + (it2.isListItem() ? it2.listLen() : 1);
+ it1.setListLen(listLen);
+ it1.setListBeginsAtZero(fromZero);
+ p.remove(i + 1); // remove i2 !
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ public void combineSenselessPatternListItems() {
+ boolean didCompress = false;
+ // String old = new String(getRuleString());
+
+ while (true) {
+ boolean thisRoundCompressed = compressFirstOccurenceOfSubsequentEqualPatternLists(slotPattern.preFillerPattern);
+ thisRoundCompressed |= compressFirstOccurenceOfSubsequentEqualPatternLists(slotPattern.fillerPattern);
+ thisRoundCompressed |= compressFirstOccurenceOfSubsequentEqualPatternLists(slotPattern.postFillerPattern);
+ didCompress |= thisRoundCompressed;
+ if (!thisRoundCompressed)
+ break;
+ }
+
+ if (didCompress) {
+ setNeedsCompile(true);
+ // TextRulerToolkit.log("old: "+old);
+ // TextRulerToolkit.log("new: "+getRuleString());
+ }
+ }
+
+}
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerSingleSlotRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerWordConstraint;
+
+public class RapierRuleItem implements TextRulerRuleItem {
+
+ protected Set<TextRulerWordConstraint> words = new HashSet<TextRulerWordConstraint>(); // direct
+
+ // word
+ // match
+ // constraint(s)
+ protected Set<String> tags = new HashSet<String>(); // pos tag constraint(s)
+
+ protected Set<String> classes = new HashSet<String>(); // semantic classes
+
+ // constraint(s)
+ protected int listLen = 0;
+
+ protected boolean listBeginsAtZero = false; // special case for TM RAPIER
+
+ // interpretation...
+
+ public boolean equals(TextRulerRuleItem o) {
+ return getStringForRuleString(null, null, 0, 1, 0, 1, 0).equals(
+ o.getStringForRuleString(null, null, 0, 1, 0, 1, 0));
+ }
+
+ public String toStringAsNonPatternList() {
+ int original = listLen;
+ listLen = 0;
+ String result = toString();
+ listLen = original;
+ return result;
+ }
+
+ public String getStringForRuleString(TextRulerRule rule, MLRuleItemType type,
+ int numberInPattern, int patternSize, int numberInRule, int ruleSize, int slotIndex) {
+ int listStart = listBeginsAtZero ? 0 : 1;
+ String quantifierString = isListItem() ? "[" + listStart + "," + (listLen) + "]?" : "";
+ String anchor = null;
+ ArrayList<String> constraints = new ArrayList<String>();
+
+ if (words.size() > 0) {
+ ArrayList<TextRulerWordConstraint> regExpConstraints = new ArrayList<TextRulerWordConstraint>();
+ ArrayList<TextRulerWordConstraint> tmTypeConstraints = new ArrayList<TextRulerWordConstraint>();
+ for (TextRulerWordConstraint w : this.words) {
+ if (w.isRegExpConstraint())
+ regExpConstraints.add(w);
+ else
+ tmTypeConstraints.add(w);
+ }
+ int regExpCount = regExpConstraints.size();
+ int tmCount = tmTypeConstraints.size();
+
+ String regExpString = null;
+ for (TextRulerWordConstraint w : regExpConstraints) {
+ if (regExpString == null)
+ regExpString = w.toString();
+ else
+ regExpString += "|" + w.toString();
+ }
+ if (regExpString != null) {
+ regExpString = "REGEXP(\"" + regExpString + "\")";
+ }
+
+ String tmString = null;
+ if (tmCount > 1 || (regExpCount > 0 && tmCount > 0)) {
+ for (TextRulerWordConstraint w : tmTypeConstraints) {
+ if (tmString == null)
+ tmString = "IS(" + w.toString() + ")";
+ else
+ tmString += ",IS(" + w.toString() + ")";
+ }
+ String cString = "OR(" + tmString;
+ if (regExpCount > 0) {
+ if (tmCount > 0)
+ cString += ",";
+ cString += regExpString;
+ }
+ cString += ")";
+ constraints.add(cString);
+ } else { // tmCount can here be 0 or 1,
+ // if it is 1, then regExpCount == 0
+ // if it is 0, regExpCount can be anything
+ if (tmCount == 1)
+ anchor = tmTypeConstraints.get(0).toString(); // +quantifierString;
+ else {
+ if (regExpCount > 0)
+ constraints.add(regExpString);
+ }
+ }
+ }
+
+ if (tags.size() > 0) {
+ if (tags.size() == 1 && anchor == null)
+ anchor = tags.iterator().next().toString();
+ else {
+ String tagsString = null;
+ for (String w : this.tags) {
+ if (tagsString == null)
+ tagsString = "IS(" + w.toString() + ")";
+ else
+ tagsString += ",IS(" + w.toString() + ")";
+ }
+ tagsString = "OR(" + tagsString + ")";
+ constraints.add(tagsString);
+ }
+ }
+
+ if (classes.size() > 0) {
+ if (classes.size() == 1 && anchor == null)
+ anchor = classes.iterator().next().toString();
+ else {
+ String classesString = null;
+ for (String w : this.classes) {
+ if (classesString == null)
+ classesString = "IS(" + w.toString() + ")";
+ else
+ classesString += ",IS(" + w.toString() + ")";
+ }
+ classesString = "OR(" + classesString + ")";
+ constraints.add(classesString);
+ }
+ }
+
+ if (anchor == null)
+ anchor = "ALL";
+
+ String result = anchor + quantifierString;
+
+ if (constraints.size() > 0) {
+ String cStr = null;
+ for (String c : constraints) {
+ if (cStr == null)
+ cStr = c.toString();
+ else
+ cStr += "," + c.toString();
+ }
+ // TODO richtig?
+ result += "{" + cStr;
+ }
+
+ if (type == MLRuleItemType.FILLER && (numberInPattern == 0)) {
+ if (constraints.size() == 0)
+ result += "{";
+ result += "->MARKONCE(" + ((TextRulerSingleSlotRule) rule).getMarkName();
+ if (patternSize > 1)
+ result += ", " + (numberInRule + 1) + ", " + (numberInRule + patternSize);
+ // for(int i=0;i < patternSize;i++) {
+ // result += ","+(i+numberInRule+1);
+ // }
+ result += ")}";
+ } else if (constraints.size() != 0)
+ result += "}";
+ return result;
+ }
+
+ public Set<TextRulerWordConstraint> getWordConstraints() {
+ return words;
+ }
+
+ public Set<String> getTagConstraints() {
+ return tags;
+ }
+
+ public Set<String> getClassConstraints() {
+ return classes;
+ }
+
+ public void setListLen(int val) {
+ this.listLen = val;
+ }
+
+ public void setListBeginsAtZero(boolean flag) {
+ this.listBeginsAtZero = flag;
+ }
+
+ public boolean listBeginsAtZero() {
+ return listBeginsAtZero;
+ }
+
+ public int listLen() {
+ return listLen;
+ }
+
+ public boolean isListItem() {
+ return (listBeginsAtZero && listLen > 0) || (listLen > 1);
+ }
+
+ public RapierRuleItem copy() {
+ RapierRuleItem newItem = new RapierRuleItem();
+ newItem.words = new HashSet<TextRulerWordConstraint>(words);
+ newItem.tags = new HashSet<String>(tags);
+ newItem.classes = new HashSet<String>(classes);
+ newItem.listLen = listLen;
+ newItem.listBeginsAtZero = listBeginsAtZero;
+ return newItem;
+ }
+
+ public int getRuleSizePoints() {
+ int result = this.isListItem() ? 3 : 2; // 3 for a list pattern item. 2
+ // for a normal pattern item
+ if (words.size() > 1)
+ result += (words.size() - 1) * 2; // every disjunct in WORD
+ // constraint counts 2
+ if (tags.size() > 1)
+ result += tags.size() - 1; // every disjunct in POS TAG constraint
+ // counts 1
+ if (classes.size() > 1)
+ result += classes.size() - 1; // every disjunct in CLASS constraint
+ // counts 1
+ return result;
+ }
+
+ public void addWordConstraints(Collection<TextRulerWordConstraint> constraints) {
+ words.addAll(constraints);
+ }
+
+ public void addTagConstraints(Collection<String> constraints) {
+ tags.addAll(constraints);
+ }
+
+ public void addClassConstraints(Collection<String> constraints) {
+ classes.addAll(constraints);
+ }
+
+ public void addWordConstraint(TextRulerWordConstraint constraint) {
+ words.add(constraint);
+ }
+
+ public void addTagConstraint(String constraint) {
+ tags.add(constraint);
+ }
+
+ public void addClassConstraint(String constraint) {
+ classes.add(constraint);
+ }
+
+ @Override
+ public String toString() {
+ return getStringForRuleString(null, null, 0, 0, 0, 0, 0);
+ }
+
+}
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.PriorityQueue;
+
+public class RapierRulePriorityQueue implements Iterable<RapierRule> {
+
+ private PriorityQueue<RapierRule> ruleList;
+
+ private PriorityQueue<RapierRule> reverseRuleList;
+
+ private int maxSize;
+
+ public RapierRulePriorityQueue(int maxSize) {
+ this.maxSize = maxSize;
+
+ ruleList = new PriorityQueue<RapierRule>(maxSize, new Comparator<RapierRule>() {
+ public int compare(RapierRule o1, RapierRule o2) {
+ if (o1.getPriority() > o2.getPriority())
+ return 1;
+ else if (o1.getPriority() < o2.getPriority())
+ return -1;
+ else
+ return 0;
+ }
+ });
+ this.reverseRuleList = new PriorityQueue<RapierRule>(maxSize, new Comparator<RapierRule>() {
+ public int compare(RapierRule o1, RapierRule o2) {
+ if (o1.getPriority() < o2.getPriority())
+ return 1;
+ else if (o1.getPriority() > o2.getPriority())
+ return -1;
+ else
+ return 0;
+ }
+ });
+ }
+
+ public Iterator<RapierRule> iterator() {
+ return ruleList.iterator();
+ }
+
+ public void clear() {
+ ruleList.clear();
+ reverseRuleList.clear();
+ }
+
+ public void addAll(Collection<RapierRule> rules) {
+ for (RapierRule r : rules)
+ add(r);
+ }
+
+ public void add(RapierRule rule) {
+ ruleList.add(rule);
+ reverseRuleList.add(rule);
+ while (ruleList.size() > maxSize) {
+ Object tail = reverseRuleList.peek();
+ ruleList.remove(tail);
+ reverseRuleList.remove(tail);
+ }
+ // TextRulerToolkit.log("ADD TO RULE LIST, SIZE="+ruleList.size()+" revSize="+reverseRuleList.size());
+ // for (RapierRule r : ruleList)
+ // {
+ // TextRulerToolkit.log("Rule Value: "+r.getPriority()+" peek: "+ruleList.peek().getPriority());
+ // }
+ }
+
+ public RapierRule peek() {
+ return ruleList.peek();
+ }
+
+}
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java
------------------------------------------------------------------------------
svn:mime-type = text/plain