You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by jo...@apache.org on 2011/08/12 12:32:54 UTC

svn commit: r1157037 [6/10] - in /uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler: ./ .settings/ META-INF/ icons/ schema/ src/ src/main/ src/main/java/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/uima/ src/main/jav...

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,692 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerRulePattern;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+import org.apache.uima.textmarker.textruler.core.TextRulerWordConstraint;
+
+public class RapierGeneralizationHelper {
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- ITEM(s) GENERALIZATION
+  // -------------------------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  private static ArrayList<TextRulerRuleItem> getGeneralizationsForRuleItems(
+          TextRulerRuleItem item1, TextRulerRuleItem item2) {
+    ArrayList<TextRulerRuleItem> item1List = new ArrayList<TextRulerRuleItem>();
+    ArrayList<TextRulerRuleItem> item2List = new ArrayList<TextRulerRuleItem>();
+    if (item1 != null)
+      item1List.add(item1);
+    if (item2 != null)
+      item2List.add(item2);
+    return getGeneralizationsForRuleItemLists(item1List, item2List);
+  }
+
+  // generalize two given pattern elements or pattern element lists and return
+  // all possible generalizations as
+  // TextRulerRuleItem objects
+  private static ArrayList<TextRulerRuleItem> getGeneralizationsForRuleItemLists(
+          ArrayList<TextRulerRuleItem> item1List, ArrayList<TextRulerRuleItem> item2List) {
+    ArrayList<RapierRuleItem> proposedWordConstraints = new ArrayList<RapierRuleItem>();
+    ArrayList<RapierRuleItem> proposedTagConstraints = new ArrayList<RapierRuleItem>();
+    ArrayList<RapierRuleItem> proposedClassConstraints = new ArrayList<RapierRuleItem>();
+    ArrayList<TextRulerRuleItem> result = new ArrayList<TextRulerRuleItem>();
+
+    int resultListLen1 = 0;
+    int resultListLen2 = 0;
+    boolean oneListIsEmpty = false;
+
+    if (item1List.size() == 0 && item2List.size() == 0) {
+      TextRulerToolkit.log("ERROR !");
+    }
+    if (item1List.size() == 0 || item2List.size() == 0) {
+      // TextRulerToolkit.log("SPECIAL CASE WITH ONE LIST OF ZERO SIZE");
+      oneListIsEmpty = true;
+    }
+
+    boolean hasEmptyWordList = false;
+    int maxWordCount = 0;
+    boolean hasEmptyTagList = false;
+    int maxTagCount = 0;
+    for (TextRulerRuleItem rt : item2List) {
+      RapierRuleItem t = (RapierRuleItem) rt;
+      resultListLen2 += t.isListItem() ? t.listLen() : 1;
+      if (t.getWordConstraints().size() > maxWordCount)
+        maxWordCount = t.getWordConstraints().size();
+      if (t.getWordConstraints().size() == 0)
+        hasEmptyWordList = true;
+      if (t.getTagConstraints().size() > maxTagCount)
+        maxTagCount = t.getTagConstraints().size();
+      if (t.getTagConstraints().size() == 0)
+        hasEmptyTagList = true;
+    }
+    for (TextRulerRuleItem rt : item1List) {
+      RapierRuleItem t = (RapierRuleItem) rt;
+      resultListLen1 += t.isListItem() ? t.listLen() : 1;
+      if (t.getWordConstraints().size() > maxWordCount)
+        maxWordCount = t.getWordConstraints().size();
+      if (t.getWordConstraints().size() == 0)
+        hasEmptyWordList = true;
+      if (t.getTagConstraints().size() > maxTagCount)
+        maxTagCount = t.getTagConstraints().size();
+      if (t.getTagConstraints().size() == 0)
+        hasEmptyTagList = true;
+    }
+    int resultListLen = resultListLen1 > resultListLen2 ? resultListLen1 : resultListLen2; // take
+    // the
+    // bigger
+    // of
+    // both
+    if (resultListLen == 1 && !oneListIsEmpty)
+      resultListLen = 0; // lists with a length of 1 can only occur when
+    // one itemList is empty! THAT CANNOT HAPPEN
+    // HERE!
+
+    // generalize word constraints:
+    if (hasEmptyWordList) // at least one constraint of both is empty
+    {
+      // do nothing here, proposed.wordItems stays empty
+      proposedWordConstraints.add(new RapierRuleItem());
+    } else // create union of both constraints AND (if both constraints
+    // weren't the same) drop constraint
+    {
+      RapierRuleItem proposed = new RapierRuleItem();
+      for (TextRulerRuleItem t : item1List)
+        proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());
+      for (TextRulerRuleItem t : item2List)
+        proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());
+
+      proposedWordConstraints.add(proposed);
+
+      // if the union of both constraints is a real union (one does not
+      // subsume the other completely),
+      // we have to add the DROPPING OF THE CONSTRAINT as a second
+      // proposed word constraint
+      if (maxWordCount != proposed.getWordConstraints().size()) // the
+      // union
+      // is a
+      // real
+      // bigger
+      // set
+      // than
+      {
+        proposedWordConstraints.add(new RapierRuleItem());
+      }
+    }
+
+    if (hasEmptyTagList) // at least one constraint of both is empty
+    {
+      // do nothing here, proposed.tagItems stays empty
+      proposedTagConstraints.add(new RapierRuleItem());
+    } else // create union of both constraints AND (if both constraints
+    // weren't the same) drop constraint
+    {
+      RapierRuleItem proposed = new RapierRuleItem();
+      for (TextRulerRuleItem t : item1List)
+        proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());
+      for (TextRulerRuleItem t : item2List)
+        proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());
+
+      proposedTagConstraints.add(proposed);
+
+      // if the union of both constraints is a real union (one does not
+      // subsume the other completely),
+      // we have to add the DROPPING OF THE CONSTRAINT as a second
+      // proposed tag constraint
+      if (maxTagCount != proposed.getTagConstraints().size()) // the union
+      // is a real
+      // bigger
+      // set than
+      {
+        proposedTagConstraints.add(new RapierRuleItem());
+      }
+    }
+
+    // TODO semantic class generalization
+    proposedClassConstraints.add(new RapierRuleItem()); // add only NO
+    // class
+    // constraint
+    // version for
+    // now!
+
+    // finally, create all combinations of the above proposed items
+    for (RapierRuleItem wt : proposedWordConstraints) {
+      for (RapierRuleItem tt : proposedTagConstraints) {
+        for (RapierRuleItem ct : proposedClassConstraints) {
+          RapierRuleItem newItem = new RapierRuleItem();
+          for (TextRulerWordConstraint wi : wt.getWordConstraints())
+            newItem.addWordConstraint(wi.copy());
+          for (String ti : tt.getTagConstraints())
+            newItem.addTagConstraint(ti);
+          for (String tc : ct.getClassConstraints())
+            newItem.addClassConstraint(tc);
+          newItem.setListLen(resultListLen);
+          newItem.setListBeginsAtZero(oneListIsEmpty && resultListLen > 0);
+          result.add(newItem);
+        }
+      }
+    }
+    return result;
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- EQUAL SIZE PATTERN GENERALIZATION
+  // --------------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  private static ArrayList<TextRulerRulePattern> getGeneralizationsForRuleItemPatternsOfEqualSize(
+          TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+
+    ArrayList<ArrayList<TextRulerRuleItem>> generalizationTable = new ArrayList<ArrayList<TextRulerRuleItem>>();
+    Iterator<TextRulerRuleItem> it2 = pattern2.iterator();
+    if (pattern1.size() != pattern2.size()) {
+      TextRulerToolkit.log("ERROR!");
+    }
+    for (TextRulerRuleItem item1 : pattern1) {
+      TextRulerRuleItem item2 = it2.next();
+      // get all possible LGGs of the current two elements and save them
+      // into the matrix
+      ArrayList<TextRulerRuleItem> allLGGs = getGeneralizationsForRuleItems(item1, item2);
+      generalizationTable.add(allLGGs);
+      // TextRulerToolkit.log("--- GET GENERALISATIONS FOR TWO TERMS:  --"+t1+"--   --"+t2+"--");
+      // ArrayList<MLRapierRuleTerm> allLGGs =
+      // this.getGeneralizationsForRuleTerms(t1, t2);
+      // for (MLRapierRuleTerm term : allLGGs)
+      // TextRulerToolkit.log("--- "+term);
+      // TextRulerToolkit.log("--- END");
+    }
+
+    // now we have patternSize lists of possible generalizations, one list
+    // per original pattern item pair of
+    // pattern1 and pattern2. we now have to build all possible
+    // combinations. Each combination is a
+    // new pattern
+    recursiveBuildAllRuleItemCombinations(generalizationTable, 0, new TextRulerRulePattern(),
+            resultList);
+    return resultList;
+  }
+
+  private static void recursiveBuildAllRuleItemCombinations(
+          ArrayList<ArrayList<TextRulerRuleItem>> table, int curIndex,
+          TextRulerRulePattern currentPattern, ArrayList<TextRulerRulePattern> resultPatterns) {
+    if (curIndex >= table.size()) {
+      // make a deep copy of the current pattern:
+      TextRulerRulePattern copy = new TextRulerRulePattern();
+      for (TextRulerRuleItem item : currentPattern)
+        copy.add(item.copy());
+      resultPatterns.add(copy);
+    } else {
+      for (TextRulerRuleItem item : table.get(curIndex)) {
+        currentPattern.add(item);
+        recursiveBuildAllRuleItemCombinations(table, curIndex + 1, currentPattern, resultPatterns);
+        currentPattern.remove(currentPattern.size() - 1);
+      }
+    }
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- DISTINCT SIZE PATTERN GENERALIZATION
+  // -----------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+
+  // this is the low level version for patterns of distinct sizes! it creates
+  // ALL possible combinations how to pair/map pattern items
+  // of the shorter with those of the longer pattern. the optimizing version
+  // (getOptimizedGeneralizationsForRuleItemPatternsOfDistinctSize)
+  // uses this method for its pattern segments! (see below)
+  private static ArrayList<TextRulerRulePattern> getGeneralizationsForRuleItemPatternsOfDistinctSize(
+          TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+    if (pattern1.size() == pattern2.size()) {
+      TextRulerToolkit.log("ERROR! CALL getGeneralizationsForRuleItemPatternsOfEqualSize instead!");
+      if (TextRulerToolkit.DEBUG)
+        return null;
+    }
+
+    TextRulerRulePattern longerPattern = pattern1;
+    TextRulerRulePattern shorterPattern = pattern2;
+    if (pattern2.size() > pattern1.size()) {
+      longerPattern = pattern2;
+      shorterPattern = pattern1;
+    }
+
+    if (longerPattern.size() <= 1 || shorterPattern.size() <= 1) {
+      // Special case 1: one of the pattern terms list is empty AND
+      // special case 2: one has only ONE element
+      if (longerPattern.size() + shorterPattern.size() == 0) {
+        TextRulerToolkit.log("ERROR !! BOTH PATTERNS ARE EMPTY!");
+        if (TextRulerToolkit.DEBUG)
+          return null;
+      }
+
+      // get all possible generalizations of the two patterns. result of
+      // each generalization is ONE rule item, so we
+      // don't use TextRulerRulePattern here since this IS NOT a rule
+      // pattern! it's a list of possible generalizations:
+      ArrayList<TextRulerRuleItem> generalizations = getGeneralizationsForRuleItemLists(
+              longerPattern, shorterPattern);
+      // create a one element result pattern for each:
+      for (TextRulerRuleItem item : generalizations) {
+        TextRulerRulePattern p = new TextRulerRulePattern();
+        p.add(item);
+        resultList.add(p);
+      }
+    }
+    // else SPECIAL CASE 3 // TODO make those values configurable ?
+    else if (((longerPattern.size() - shorterPattern.size()) > 6) || (longerPattern.size() > 10)) {
+      int resultListLen1 = 0;
+      for (TextRulerRuleItem rt : shorterPattern)
+        resultListLen1 += ((RapierRuleItem) rt).isListItem() ? ((RapierRuleItem) rt).listLen() : 1;
+      int resultListLen2 = 0;
+      for (TextRulerRuleItem rt : longerPattern)
+        resultListLen2 += ((RapierRuleItem) rt).isListItem() ? ((RapierRuleItem) rt).listLen() : 1;
+
+      RapierRuleItem singleItem = new RapierRuleItem();
+      singleItem.setListLen(resultListLen1 > resultListLen2 ? resultListLen1 : resultListLen2);
+      TextRulerRulePattern singleItemPattern = new TextRulerRulePattern();
+      singleItemPattern.add(singleItem);
+      resultList.add(singleItemPattern);
+    } else { // sizes are different and both > 1
+      // create all possible generalization combinations, that is: how can
+      // we map elements of the shorter pattern
+      // to the ones of the longer pattern and then generalize each
+      // mapping (each group) of items?
+      ArrayList<ArrayList<RapierPatternItemMapping>> combinationList = new ArrayList<ArrayList<RapierPatternItemMapping>>();
+
+      recursiveBuildAllPossiblePatternMappingSequences(longerPattern, shorterPattern,
+              new ArrayList<RapierPatternItemMapping>(), combinationList);
+
+      for (ArrayList<RapierPatternItemMapping> mappingSequence : combinationList) {
+        resultList.addAll(getGeneralizationsForPatternMappingSequence(mappingSequence));
+      }
+    }
+    return resultList;
+  }
+
+  // creates all possible combinations how to pair together items from the
+  // longer and the shorter source pattern, e.g.
+  // 1 2 3 4 5 vs. 1 2 3 = 1/1+2+3 2/4 3/5, ...
+  private static void recursiveBuildAllPossiblePatternMappingSequences(
+          TextRulerRulePattern longerPattern, TextRulerRulePattern shorterPattern,
+          ArrayList<RapierPatternItemMapping> currentMappingSequence,
+          ArrayList<ArrayList<RapierPatternItemMapping>> resultList) {
+    int windowSize = longerPattern.size() - shorterPattern.size() + 1;
+
+    if (shorterPattern.size() > longerPattern.size()) {
+      TextRulerToolkit.log("ERROR: SHORTER > LONGER !!");
+    }
+    if (longerPattern.size() == 0 || shorterPattern.size() == 0) {
+      TextRulerToolkit.log("ERROR: SHORTER == LONGER == 0!");
+    } else {
+      // if the remaining (sub-)patterns are of equal size or one has only
+      // one element left, create one last item mapping and
+      // a final result mapping sequence:
+      if (shorterPattern.size() == 1 || (longerPattern.size() == shorterPattern.size())) {
+        RapierPatternItemMapping lastMapping = new RapierPatternItemMapping();
+        lastMapping.shorterPattern.addAll(shorterPattern);
+        lastMapping.longerPattern.addAll(longerPattern);
+        ArrayList<RapierPatternItemMapping> newMappingSequence = new ArrayList<RapierPatternItemMapping>();
+        newMappingSequence.addAll(currentMappingSequence);
+        newMappingSequence.add(lastMapping);
+        resultList.add(newMappingSequence);
+      } else { // otherwise we have to create all possible combinations of
+        // the longer and shorter remaining pattern:
+        TextRulerRuleItem firstItem = shorterPattern.get(0);
+        // combine with 0, 0/1, ... 0/1/2/.../windowSize-1
+        for (int maxi = 0; maxi < windowSize; maxi++) {
+          RapierPatternItemMapping newMapping = new RapierPatternItemMapping();
+          newMapping.shorterPattern.add(firstItem);
+          for (int li = 0; li <= maxi; li++)
+            newMapping.longerPattern.add(longerPattern.get(li));
+          currentMappingSequence.add(newMapping);
+          TextRulerRulePattern restLongerPattern = new TextRulerRulePattern();
+          TextRulerRulePattern restShorterPattern = new TextRulerRulePattern();
+          for (int i = 1; i < shorterPattern.size(); i++)
+            restShorterPattern.add(shorterPattern.get(i));
+          for (int i = maxi + 1; i < longerPattern.size(); i++)
+            restLongerPattern.add(longerPattern.get(i));
+
+          // recurse:
+          recursiveBuildAllPossiblePatternMappingSequences(restLongerPattern, restShorterPattern,
+                  currentMappingSequence, resultList);
+
+          // remove last segment to get back to the same state as
+          // before the recursion:
+          currentMappingSequence.remove(currentMappingSequence.size() - 1);
+        }
+      }
+    }
+  }
+
+  // here the input is called a MAPPING instead of a pattern segmentation in
+  // order to distinguish between the two levels of
+  // dividing the problem: a pattern segmentation is a special mapping of
+  // equal items in the two to generalize source patterns;
+  // the segments that result through that segmentation still need to be
+  // generalized (see getGeneralizationsForPatternSegmentation)
+  // if sucha semgent has subpattersn of different size,
+  // getGeneralizationsForRuleItemPatternsOfDistinctSize is used to
+  // generalize it, which uses THIS METHOD HERE to get all generalizations for
+  // a special MAPPING. a mapping (in comparison to the
+  // segmentation!) is a mapping between the longer and shorter pattern items
+  // which then get directly generalized here!
+  // in order to show this difference, we use the (inernally exactly the
+  // same!) class RapierPatternItemMapping instead of
+  // RapierPatternSegment!)
+  private static ArrayList<TextRulerRulePattern> getGeneralizationsForPatternMappingSequence(
+          ArrayList<RapierPatternItemMapping> patternMappingSequence) {
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+    ArrayList<ArrayList<TextRulerRuleItem>> generalizationTable = new ArrayList<ArrayList<TextRulerRuleItem>>();
+
+    // every mapping has several possible generalizations, so we store all
+    // of them in that generalizationTable, one list of
+    // generalizations for each mapping:
+    for (RapierPatternItemMapping mapping : patternMappingSequence) {
+      ArrayList<TextRulerRuleItem> lggList = getGeneralizationsForRuleItemLists(
+              mapping.shorterPattern, mapping.longerPattern);
+      generalizationTable.add(lggList);
+    }
+
+    // afterwards we have again to create all possible combinations of those
+    // lists (like in the equalSizeGeneralization):
+    // Each combination is a new pattern
+    recursiveBuildAllRuleItemCombinations(generalizationTable, 0, new TextRulerRulePattern(),
+            resultList);
+    return resultList;
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- FIND MATCHINGS BETWEEN PATTERNS FOR GENERALIZATION
+  // -----------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+
+  // find matches in two patterns by comparing the items of the patterns and
+  // return all possible segmentations of those two patterns.
+  private static void recursiveFindPatternSegmentsByMatchingPatternItems(
+          TextRulerRulePattern longerPattern, TextRulerRulePattern shorterPattern,
+          ArrayList<RapierPatternSegment> currentSegmentation,
+          ArrayList<ArrayList<RapierPatternSegment>> resultList) {
+    int cmpWindowSize = longerPattern.size() - shorterPattern.size() + 1;
+
+    // is one
+    // (rest-)pattern
+    // empty ?
+    if (longerPattern.size() == 0 || shorterPattern.size() == 0) {
+      // create result segmentation
+      ArrayList<RapierPatternSegment> newSegmentation = new ArrayList<RapierPatternSegment>();
+      newSegmentation.addAll(currentSegmentation); // add current
+      // add rest if
+      // recursive state
+      // anything is
+      // left one of
+      // the
+      // patterns
+      if (longerPattern.size() + shorterPattern.size() > 0) {
+        RapierPatternSegment lastSegment = new RapierPatternSegment();
+        for (TextRulerRuleItem i : shorterPattern)
+          lastSegment.shorterPattern.add(i);
+        for (TextRulerRuleItem i : longerPattern)
+          lastSegment.longerPattern.add(i);
+        newSegmentation.add(lastSegment);
+      }
+      resultList.add(newSegmentation);
+    } else {
+      boolean matched = false;
+      for (int si = 0; si < shorterPattern.size(); si++) {
+        // compare element si with si, si+1, ... si+cmpWindowSize-1
+        for (int li = si; li < si + cmpWindowSize; li++) {
+          if (longerPattern.get(li).equals(shorterPattern.get(si))) {
+            // matched pair found!
+            matched = true;
+            // --> calculate pattern segments, add them to the
+            // current one and pass the rest of the
+            // patterns to the next recursion level:
+            RapierPatternSegment newSegment = new RapierPatternSegment();
+            for (int i = 0; i < si; i++)
+              newSegment.shorterPattern.add(shorterPattern.get(i));
+            for (int i = 0; i < li; i++)
+              newSegment.longerPattern.add(longerPattern.get(i));
+
+            boolean addedLeftSegmentation = false;
+            if (newSegment.longerPattern.size() > 0 || newSegment.shorterPattern.size() > 0) {
+              // only add if the segmentation is not empty!
+              currentSegmentation.add(newSegment);
+              addedLeftSegmentation = true;
+            }
+
+            RapierPatternSegment matchedSegment = new RapierPatternSegment();
+            matchedSegment.shorterPattern.add(shorterPattern.get(si));
+            matchedSegment.longerPattern.add(longerPattern.get(li));
+            currentSegmentation.add(matchedSegment);
+
+            // the rest is now the rest to the right of both (so
+            // li+1 and si+1 to the ends...)
+            TextRulerRulePattern restLongerPattern = new TextRulerRulePattern();
+            TextRulerRulePattern restShorterPattern = new TextRulerRulePattern();
+            for (int i = li + 1; i < longerPattern.size(); i++)
+              restLongerPattern.add(longerPattern.get(i));
+            for (int i = si + 1; i < shorterPattern.size(); i++)
+              restShorterPattern.add(shorterPattern.get(i));
+
+            // recurse...
+            if (restLongerPattern.size() > restShorterPattern.size())
+              recursiveFindPatternSegmentsByMatchingPatternItems(restLongerPattern,
+                      restShorterPattern, currentSegmentation, resultList);
+            else
+              recursiveFindPatternSegmentsByMatchingPatternItems(restShorterPattern,
+                      restLongerPattern, currentSegmentation, resultList);
+
+            // remove added segments so that we are in the same
+            // state as before the recursion:
+            if (addedLeftSegmentation)
+              currentSegmentation.remove(currentSegmentation.size() - 1); // remove the left side
+            // segment
+            currentSegmentation.remove(currentSegmentation.size() - 1); // remove
+            // the
+            // matched
+            // segment
+          }
+        }
+      }
+      if (!matched) // add remaining items of both lists in one pattern
+      // segment
+      {
+        ArrayList<RapierPatternSegment> newSegmentation = new ArrayList<RapierPatternSegment>();
+        newSegmentation.addAll(currentSegmentation);
+
+        RapierPatternSegment lastSegment = new RapierPatternSegment();
+        for (TextRulerRuleItem i : shorterPattern)
+          lastSegment.shorterPattern.add(i);
+        for (TextRulerRuleItem i : longerPattern)
+          lastSegment.longerPattern.add(i);
+        newSegmentation.add(lastSegment);
+        resultList.add(newSegmentation);
+      }
+    }
+  }
+
+  private static ArrayList<TextRulerRulePattern> getGeneralizationsForPatternSegmentation(
+          ArrayList<RapierPatternSegment> patternSegmentation) {
+    // for creating those, we need a table:
+    // each segment of the patternSegmentation creates a bunch of possible
+    // new generalized sub patterns (that's the inner
+    // ArrayList<TextRulerRulePattern>)
+    // since we have a whole sequence of pattern semgents (a whole
+    // segmentation), we need the outer ArrayList to collect
+    // all generalizations of all pattern segments:
+    ArrayList<ArrayList<TextRulerRulePattern>> generalizationTable = new ArrayList<ArrayList<TextRulerRulePattern>>();
+
+    // now, we create all generalizations of each pattern segment and
+    // collect them in that table:
+    for (RapierPatternSegment pSeg : patternSegmentation) {
+      ArrayList<TextRulerRulePattern> pSegGeneralizations;
+
+      if (pSeg.longerPattern.size() == pSeg.shorterPattern.size())
+        pSegGeneralizations = getGeneralizationsForRuleItemPatternsOfEqualSize(pSeg.longerPattern,
+                pSeg.shorterPattern);
+      else
+        pSegGeneralizations = getGeneralizationsForRuleItemPatternsOfDistinctSize(
+                pSeg.longerPattern, pSeg.shorterPattern);
+
+      generalizationTable.add(pSegGeneralizations);
+    }
+
+    // finally, we have to build all combinations of them in form of
+    // MLRulePatterns:
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>(); // the
+    // result
+    // is
+    // a
+    // list
+    // of
+    // new
+    // generalized
+    // patterns
+
+    recursiveBuildAllRuleItemCombinationsFromPatterns(generalizationTable, 0,
+            new TextRulerRulePattern(), resultList);
+
+    return resultList;
+  }
+
+  private static void recursiveBuildAllRuleItemCombinationsFromPatterns(
+          ArrayList<ArrayList<TextRulerRulePattern>> table, int curIndex,
+          TextRulerRulePattern currentPattern, ArrayList<TextRulerRulePattern> resultPatterns) {
+    if (curIndex >= table.size()) {
+      // make a deep copy of the current pattern:
+      TextRulerRulePattern copy = new TextRulerRulePattern();
+      for (TextRulerRuleItem item : currentPattern)
+        copy.add(item.copy());
+      resultPatterns.add(copy);
+    } else {
+      for (TextRulerRulePattern pattern : table.get(curIndex)) {
+        currentPattern.addAll(pattern);
+        recursiveBuildAllRuleItemCombinationsFromPatterns(table, curIndex + 1, currentPattern,
+                resultPatterns);
+        for (int i = 0; i < pattern.size(); i++)
+          currentPattern.remove(currentPattern.size() - 1);
+      }
+    }
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- WORKING ON PATTERNS OF DISTINCT LENGTH - OPTIMIZED
+  // ---------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+
+  // "optimized", because this method uses the pattern item matching
+  // optimization (search for equal items and make a segmentation, etc.)
+  private static ArrayList<TextRulerRulePattern> getOptimizedGeneralizationsForRuleItemPatternsOfDistinctSize(
+          TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+    ArrayList<ArrayList<RapierPatternSegment>> matchedCombinationList = new ArrayList<ArrayList<RapierPatternSegment>>();
+
+    // in order to reduce the amount of possible combinations how to combine
+    // the elements of the shorter pattern with the
+    // elements of the longer pattern, we first search for equal pattern
+    // items in both patterns. those equal patterns get hardwired
+    // in the combination and the left and right remaining subpatterns stay
+    // as a "divided smaller problem" that needs to be conquered...
+    // the result of the search is a list of possible segmentations of the
+    // patterns. all semgementations are those "smaller"
+    // left problems that we then need to generalize in the original manner.
+    // if no equal items are found, one segmentation with only one segment
+    // (the orignal longer and shoter pattern) is returned and
+    // has to be generalized.
+    if (pattern1.size() > pattern2.size())
+      recursiveFindPatternSegmentsByMatchingPatternItems(pattern1, pattern2,
+              new ArrayList<RapierPatternSegment>(), matchedCombinationList);
+    else
+      recursiveFindPatternSegmentsByMatchingPatternItems(pattern2, pattern1,
+              new ArrayList<RapierPatternSegment>(), matchedCombinationList);
+
+    // if (TextRulerToolkit.DEBUG && matchedCombinationList.size() > 1)
+    // {
+    // TextRulerToolkit.log("PATTERN SEQUENCES FOUND: "+matchedCombinationList.size());
+    // for (ArrayList<RapierPatternSegment> patternSequence :
+    // matchedCombinationList)
+    // {
+    // TextRulerToolkit.log("\tNEXT SEQUENCE");
+    // for (RapierPatternSegment pSeg : patternSequence)
+    // {
+    // TextRulerToolkit.log("\t\t"+pSeg.longerPattern);
+    // TextRulerToolkit.log("\t\t"+pSeg.shorterPattern);
+    // }
+    // }
+    // }
+
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+
+    for (ArrayList<RapierPatternSegment> patternSegmentation : matchedCombinationList) {
+      // TODO filter out possible duplicates ?
+      resultList.addAll(getGeneralizationsForPatternSegmentation(patternSegmentation));
+    }
+    return resultList;
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- THE FINAL RESULT: a genarlization method
+  // -------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+
+  // input: two sequences of rule items (=patterns) that shall be
+  // generalized... matchings are searched for a optimized search
+  // and to get a not too big count of generalizations...
+  // result: a (probably very large!) list of possible generalizations, e.g.
+  // used for all slotfiller generalizations of two rules...
+  public static ArrayList<TextRulerRulePattern> getGeneralizationsForRuleItemPatterns(
+          TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+    ArrayList<TextRulerRulePattern> result = new ArrayList<TextRulerRulePattern>();
+
+    // if (TextRulerToolkit.DEBUG)
+    // {
+    // TextRulerToolkit.log("\tgetGeneralizationsForRuleItemPatterns:");
+    // TextRulerToolkit.log("\tPattern1:"+pattern1);
+    // TextRulerToolkit.log("\tPattern2:"+pattern2);
+    // }
+
+    if (pattern1.size() == 0 && pattern2.size() == 0) {
+      return result; // return empty list
+    } else if (pattern1.size() == pattern2.size()) // both have the same
+    // pattern item count
+    {
+      // generalizing is easy then: simply generalize each pair of items:
+      result = getGeneralizationsForRuleItemPatternsOfEqualSize(pattern1, pattern2);
+    } else {
+      // TextRulerToolkit.logIf(TextRulerToolkit.DEBUG && pattern1.size()
+      // == 0 || pattern2.size() == 0, "SpecialCaseWithZeroLength");
+      result = getOptimizedGeneralizationsForRuleItemPatternsOfDistinctSize(pattern1, pattern2);
+    }
+
+    // if (TextRulerToolkit.DEBUG)
+    // {
+    // TextRulerToolkit.log("\t\tGeneralizations: "+result.size());
+    // for (TextRulerRulePattern lggPattern : result)
+    // TextRulerToolkit.log("\t\t\t"+lggPattern);
+    // }
+
+    return result;
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierGeneralizationHelper.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+public class RapierPatternItemMapping extends RapierPatternSegment {
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternItemMapping.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerRulePattern;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+
+public class RapierPatternSegment {
+
+  TextRulerRulePattern shorterPattern = new TextRulerRulePattern();
+
+  TextRulerRulePattern longerPattern = new TextRulerRulePattern();
+
+  public void debugOutput() {
+
+    TextRulerToolkit.log("\n-------------\nShorterList: ");
+    for (TextRulerRuleItem t : shorterPattern)
+      System.out.print(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + "    ");
+    TextRulerToolkit.log("");
+
+    System.out.print("LongerList: ");
+    for (TextRulerRuleItem t : longerPattern)
+      System.out.print(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + "    ");
+    TextRulerToolkit.log("");
+  }
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPatternSegment.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.uima.textmarker.textruler.TextRulerPlugin;
+import org.apache.uima.textmarker.textruler.extension.TextRulerController;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerController;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerFactory;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerParameter;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditor;
+import org.eclipse.jface.preference.FieldEditorPreferencePage;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.jface.preference.StringFieldEditor;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+public class RapierPreferencePage extends FieldEditorPreferencePage implements
+        IWorkbenchPreferencePage {
+
+  public static String ID = "org.apache.uima.textmarker.textruler.algorithmPages";
+
+  private TextRulerLearnerController algorithmController;
+
+  private IPreferenceStore store;
+
+  private ArrayList<FieldEditor> fields = new ArrayList<FieldEditor>();
+
+  public RapierPreferencePage() {
+    super(FieldEditorPreferencePage.GRID);
+    TextRulerLearnerController ctrl = TextRulerController
+            .getControllerForID("org.apache.uima.textmarker.textruler.rapier");
+    this.algorithmController = ctrl;
+    store = TextRulerPlugin.getDefault().getPreferenceStore();
+    setPreferenceStore(store);
+  }
+
+  @Override
+  public void init(IWorkbench workbench) {
+  }
+
+  protected void createFieldEditors() {
+    TextRulerLearnerFactory f = algorithmController.getFactory();
+    TextRulerLearnerParameter[] params = f.getAlgorithmParameters();
+    Map<String, Object> values = f.getAlgorithmParameterStandardValues();
+    if (params != null) {
+      for (int i = 0; i < params.length; i++) {
+        TextRulerLearnerParameter p = params[i];
+        String id = algorithmController.getID() + "." + p.id;
+        FieldEditor l = null;
+        switch (p.type) {
+          case ML_BOOL_PARAM: {
+            l = new BooleanFieldEditor(id, p.name, getFieldEditorParent());
+            fields.add(l);
+            addField(l);
+            store.setDefault(id, (Boolean) values.get(p.id));
+            l.setPreferenceStore(store);
+            break;
+          }
+
+          case ML_FLOAT_PARAM:
+          case ML_INT_PARAM:
+          case ML_STRING_PARAM: {
+            l = new StringFieldEditor(id, p.name, getFieldEditorParent());
+            fields.add(l);
+            addField(l);
+            store.setDefault(id, values.get(p.id).toString());
+            l.setPreferenceStore(store);
+            break;
+          }
+          case ML_SELECT_PARAM:
+            break;
+        }
+      }
+    }
+  }
+
+  @Override
+  protected void performDefaults() {
+    for (FieldEditor f : fields)
+      f.loadDefault();
+    // super.performDefaults();
+  }
+
+  @Override
+  public boolean performOk() {
+    for (FieldEditor f : fields)
+      f.store();
+    // return super.performOk();
+    return true;
+  }
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierPreferencePage.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,364 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.ArrayList;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner;
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerRulePattern;
+import org.apache.uima.textmarker.textruler.core.TextRulerSingleSlotRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerStatisticsCollector;
+import org.apache.uima.textmarker.textruler.core.TextRulerTarget;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+
+
+public class RapierRule extends TextRulerSingleSlotRule {
+
+  protected double priority = 0;
+
+  protected RapierRule parent1 = null;
+
+  protected RapierRule parent2 = null;
+
+  protected int parent1PreFiller_n = 0;
+
+  protected int parent2PreFiller_n = 0;
+
+  protected int parent1PostFiller_n = 0;
+
+  protected int parent2PostFiller_n = 0;
+
+  // copy constructor:
+  protected RapierRule(RapierRule copyFrom) {
+    super(copyFrom);
+    priority = copyFrom.priority;
+    parent1 = copyFrom.parent1;
+    parent2 = copyFrom.parent2;
+    parent1PreFiller_n = copyFrom.parent1PreFiller_n;
+    parent2PreFiller_n = copyFrom.parent2PreFiller_n;
+    parent1PostFiller_n = copyFrom.parent1PostFiller_n;
+    parent2PostFiller_n = copyFrom.parent2PostFiller_n;
+  }
+
+  @Override
+  public RapierRule copy() {
+    return new RapierRule(this);
+  }
+
+  public RapierRule(TextRulerBasicLearner parentAlgorithm, TextRulerTarget target) {
+    super(parentAlgorithm, target);
+  }
+
+  public boolean producesOnlyValidFillers() {
+    return coveringStatistics.getTotalCoveredExamples() > 0
+            && coveringStatistics.getCoveredNegativesCount() == 0;
+  }
+
+  public double noiseValue() {
+    int p = coveringStatistics.getCoveredPositivesCount();
+    int n = coveringStatistics.getCoveredNegativesCount();
+    return ((double) (p - n)) / ((double) (p + n)); // p-n/p+n in,
+    // p=positive fillers,
+    // n=spurious fillers
+  }
+
+  public static double log2(double z) {
+    return Math.log(z) / Math.log(2.0);
+  }
+
+  @Override
+  public void setCoveringStatistics(TextRulerStatisticsCollector c) {
+    super.setCoveringStatistics(c);
+    int p = c.getCoveredPositivesCount();
+    int n = c.getCoveredNegativesCount();
+    if (p < 1) {
+      TextRulerToolkit.log("ERROR, A RULE MAY NOT COVER ZERO POSITIVE EXAMPLES! WHAT'S WRONG ?");
+      TextRulerToolkit.log("\tRULE: " + getRuleString());
+      // make sure this rule is rated totally bad:
+      priority = Double.MAX_VALUE;
+
+    } else {
+      // priority = -(Math.log( ((double)(p+1)) / ((double)(p+n+2))) /
+      // Math.log( 2 )) + (((double)this.calculateRuleSize()) /
+      // ((double)p*100.0));
+      priority = ((this.calculateRuleSize()) / ((double) p * 100))
+              - log2(((double) (p + 1)) / ((double) (p + n + 2)));
+
+      if (priority > 10000 && p > 0) {
+        TextRulerToolkit.log("STRANGE RULE PRIORITY ! CHECK THIS!");
+      }
+    }
+  }
+
+  public double getPriority() {
+    return priority;
+  }
+
+  public RapierRule getParent1() {
+    return parent1;
+  }
+
+  public RapierRule getParent2() {
+    return parent2;
+  }
+
+  public void setParent1(RapierRule p) {
+    parent1 = p;
+  }
+
+  public void setParent2(RapierRule p) {
+    parent2 = p;
+  }
+
+  public int getParent1PreFiller_n() {
+    return this.parent1PreFiller_n;
+  }
+
+  public int getParent2PreFiller_n() {
+    return this.parent2PreFiller_n;
+  }
+
+  public void setParent1PreFiller_n(int n) {
+    this.parent1PreFiller_n = n;
+  }
+
+  public void setParent2PreFiller_n(int n) {
+    this.parent2PreFiller_n = n;
+  }
+
+  public int getParent1PostFiller_n() {
+    return this.parent1PostFiller_n;
+  }
+
+  public int getParent2PostFiller_n() {
+    return this.parent2PostFiller_n;
+  }
+
+  public void setParent1PostFiller_n(int n) {
+    this.parent1PostFiller_n = n;
+  }
+
+  public void setParent2PostFiller_n(int n) {
+    this.parent2PostFiller_n = n;
+  }
+
+  public double calculateRuleSize() {
+    double result = 0;
+    for (TextRulerRuleItem ri : slotPattern.preFillerPattern)
+      result += ((RapierRuleItem) ri).getRuleSizePoints();
+    for (TextRulerRuleItem ri : slotPattern.fillerPattern)
+      result += ((RapierRuleItem) ri).getRuleSizePoints();
+    for (TextRulerRuleItem ri : slotPattern.postFillerPattern)
+      result += ((RapierRuleItem) ri).getRuleSizePoints();
+    return result / 100.0;
+  }
+
+  public boolean hasListItemAtBorder() {
+    int cnt = totalItemCount();
+    if (cnt == 0)
+      return false;
+    RapierRuleItem ri = (RapierRuleItem) getRuleItemWithIndex(0);
+    if (ri.isListItem())
+      return true;
+    if (cnt <= 1)
+      return false;
+    ri = (RapierRuleItem) getRuleItemWithIndex(cnt - 1);
+    return (ri.isListItem());
+  }
+
+  public ArrayList<RapierRule> createAllTestRules() {
+    if (!hasListItemAtBorder())
+      return null;
+    ArrayList<RapierRule> result = new ArrayList<RapierRule>();
+
+    RapierRule strippedRule = copy();
+
+    int leftType = 0; // 0=none; 1=prefiller; 2=filler; 3=postfiller
+    RapierRuleItem left = null;
+    if (slotPattern.preFillerPattern.size() > 0) {
+      leftType = 1;
+      left = (RapierRuleItem) slotPattern.preFillerPattern.get(0);
+      if (left.isListItem())
+        strippedRule.getPreFillerPattern().remove(0);
+    } else if (slotPattern.fillerPattern.size() > 0) {
+      leftType = 2;
+      left = (RapierRuleItem) slotPattern.fillerPattern.get(0);
+      if (left.isListItem())
+        strippedRule.getFillerPattern().remove(0);
+    } else if (slotPattern.postFillerPattern.size() > 0) {
+      leftType = 3;
+      left = (RapierRuleItem) slotPattern.postFillerPattern.get(0);
+      if (left.isListItem())
+        strippedRule.getPostFillerPattern().remove(0);
+    }
+
+    if (left != null && !left.isListItem()) {
+      left = null;
+      leftType = 0;
+    }
+
+    int rightType = 0; // 0=none; 1=postfiller; 2=filler; 3=prefiller
+    RapierRuleItem right = null;
+    if (totalItemCount() > 1) {
+      if (slotPattern.postFillerPattern.size() > 0) {
+        rightType = 1;
+        right = (RapierRuleItem) slotPattern.postFillerPattern.get(slotPattern.postFillerPattern
+                .size() - 1);
+        if (right.isListItem())
+          strippedRule.getPostFillerPattern()
+                  .remove(strippedRule.getPostFillerPattern().size() - 1);
+      } else if (slotPattern.fillerPattern.size() > 0) {
+        rightType = 2;
+        right = (RapierRuleItem) slotPattern.fillerPattern
+                .get(slotPattern.fillerPattern.size() - 1);
+        if (right.isListItem())
+          strippedRule.getFillerPattern().remove(strippedRule.getFillerPattern().size() - 1);
+      } else if (slotPattern.postFillerPattern.size() > 0) {
+        rightType = 3;
+        right = (RapierRuleItem) slotPattern.preFillerPattern.get(slotPattern.preFillerPattern
+                .size() - 1);
+        if (right.isListItem())
+          strippedRule.getPreFillerPattern().remove(strippedRule.getPreFillerPattern().size() - 1);
+      }
+    }
+    if (right != null && !right.isListItem()) {
+      right = null;
+      rightType = 0;
+    }
+    if (left == null && right == null) {
+      TextRulerToolkit.logIfDebug("HOW CAN THIS BE ?");
+      return null;
+    }
+    int leftCount = left != null ? left.listLen() : 0;
+    int rightCount = right != null ? right.listLen() : 0;
+
+    int leftStart;
+    if (leftCount > 0 && !left.listBeginsAtZero())
+      leftStart = 1;
+    else
+      leftStart = 0;
+    int rightStart;
+    if (rightCount > 0 && !right.listBeginsAtZero())
+      rightStart = 1;
+    else
+      rightStart = 0;
+
+    for (int leftI = leftStart; leftI <= leftCount; leftI++)
+      for (int rightI = rightStart; rightI <= rightCount; rightI++) {
+        RapierRule newRule = strippedRule.copy();
+        if (leftI > 0) {
+          TextRulerRulePattern thePattern = null;
+          if (leftType == 1)
+            thePattern = newRule.getPreFillerPattern();
+          else if (leftType == 2)
+            thePattern = newRule.getFillerPattern();
+          else if (leftType == 3)
+            thePattern = newRule.getPostFillerPattern();
+          for (int i = 0; i < leftI; i++) {
+            RapierRuleItem theItem = left.copy();
+            theItem.setListLen(0); // remove List-Character but add
+            // listI copies instead!!
+            thePattern.add(0, theItem);
+          }
+        }
+        if (rightI > 0) {
+          TextRulerRulePattern thePattern = null;
+          if (rightType == 1)
+            thePattern = newRule.getPostFillerPattern();
+          else if (rightType == 2)
+            thePattern = newRule.getFillerPattern();
+          else if (rightType == 3)
+            thePattern = newRule.getPreFillerPattern();
+          for (int i = 0; i < rightI; i++) {
+            RapierRuleItem theItem = right.copy();
+            theItem.setListLen(0); // remove List-Character but add
+            // listI copies instead!!
+            thePattern.add(theItem);
+          }
+        }
+        newRule.setNeedsCompile(true);
+        if (newRule.totalItemCount() > 0) {
+          // TextRulerToolkit.log(newRule.getRuleString());
+          result.add(newRule);
+        }
+      }
+    return result;
+  }
+
+  @Override
+  protected String getRulesFileContent() {
+    String theString;
+    if (hasListItemAtBorder()) {
+      ArrayList<RapierRule> rules = createAllTestRules();
+
+      theString = "// " + getRuleString() + "\n\n";
+      for (RapierRule r : rules)
+        theString += r.getRuleString() + "\n";
+    } else
+      theString = getRuleString() + "\n";
+    ;
+    return algorithm.getTMFileHeaderString() + theString;
+  }
+
+  public boolean isInitialRule() {
+    return parent1 == null || parent2 == null;
+  }
+
+  protected boolean compressFirstOccurenceOfSubsequentEqualPatternLists(TextRulerRulePattern p) {
+    for (int i = 0; i < p.size() - 1; i++) {
+      RapierRuleItem it1 = (RapierRuleItem) p.get(i);
+      RapierRuleItem it2 = (RapierRuleItem) p.get(i + 1);
+      if (it1.toStringAsNonPatternList().equals(it2.toStringAsNonPatternList())) {
+        if (it1.isListItem() || it2.isListItem()) {
+          boolean fromZero = it1.listBeginsAtZero() && it2.listBeginsAtZero();
+          int listLen = (it1.isListItem() ? it1.listLen() : 1)
+                  + (it2.isListItem() ? it2.listLen() : 1);
+          it1.setListLen(listLen);
+          it1.setListBeginsAtZero(fromZero);
+          p.remove(i + 1); // remove i2 !
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  public void combineSenselessPatternListItems() {
+    boolean didCompress = false;
+    // String old = new String(getRuleString());
+
+    while (true) {
+      boolean thisRoundCompressed = compressFirstOccurenceOfSubsequentEqualPatternLists(slotPattern.preFillerPattern);
+      thisRoundCompressed |= compressFirstOccurenceOfSubsequentEqualPatternLists(slotPattern.fillerPattern);
+      thisRoundCompressed |= compressFirstOccurenceOfSubsequentEqualPatternLists(slotPattern.postFillerPattern);
+      didCompress |= thisRoundCompressed;
+      if (!thisRoundCompressed)
+        break;
+    }
+
+    if (didCompress) {
+      setNeedsCompile(true);
+      // TextRulerToolkit.log("old: "+old);
+      // TextRulerToolkit.log("new: "+getRuleString());
+    }
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRule.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerSingleSlotRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerWordConstraint;
+
+public class RapierRuleItem implements TextRulerRuleItem {
+
+  protected Set<TextRulerWordConstraint> words = new HashSet<TextRulerWordConstraint>(); // direct
+
+  // word
+  // match
+  // constraint(s)
+  protected Set<String> tags = new HashSet<String>(); // pos tag constraint(s)
+
+  protected Set<String> classes = new HashSet<String>(); // semantic classes
+
+  // constraint(s)
+  protected int listLen = 0;
+
+  protected boolean listBeginsAtZero = false; // special case for TM RAPIER
+
+  // interpretation...
+
+  public boolean equals(TextRulerRuleItem o) {
+    return getStringForRuleString(null, null, 0, 1, 0, 1, 0).equals(
+            o.getStringForRuleString(null, null, 0, 1, 0, 1, 0));
+  }
+
+  public String toStringAsNonPatternList() {
+    int original = listLen;
+    listLen = 0;
+    String result = toString();
+    listLen = original;
+    return result;
+  }
+
+  public String getStringForRuleString(TextRulerRule rule, MLRuleItemType type,
+          int numberInPattern, int patternSize, int numberInRule, int ruleSize, int slotIndex) {
+    int listStart = listBeginsAtZero ? 0 : 1;
+    String quantifierString = isListItem() ? "[" + listStart + "," + (listLen) + "]?" : "";
+    String anchor = null;
+    ArrayList<String> constraints = new ArrayList<String>();
+
+    if (words.size() > 0) {
+      ArrayList<TextRulerWordConstraint> regExpConstraints = new ArrayList<TextRulerWordConstraint>();
+      ArrayList<TextRulerWordConstraint> tmTypeConstraints = new ArrayList<TextRulerWordConstraint>();
+      for (TextRulerWordConstraint w : this.words) {
+        if (w.isRegExpConstraint())
+          regExpConstraints.add(w);
+        else
+          tmTypeConstraints.add(w);
+      }
+      int regExpCount = regExpConstraints.size();
+      int tmCount = tmTypeConstraints.size();
+
+      String regExpString = null;
+      for (TextRulerWordConstraint w : regExpConstraints) {
+        if (regExpString == null)
+          regExpString = w.toString();
+        else
+          regExpString += "|" + w.toString();
+      }
+      if (regExpString != null) {
+        regExpString = "REGEXP(\"" + regExpString + "\")";
+      }
+
+      String tmString = null;
+      if (tmCount > 1 || (regExpCount > 0 && tmCount > 0)) {
+        for (TextRulerWordConstraint w : tmTypeConstraints) {
+          if (tmString == null)
+            tmString = "IS(" + w.toString() + ")";
+          else
+            tmString += ",IS(" + w.toString() + ")";
+        }
+        String cString = "OR(" + tmString;
+        if (regExpCount > 0) {
+          if (tmCount > 0)
+            cString += ",";
+          cString += regExpString;
+        }
+        cString += ")";
+        constraints.add(cString);
+      } else { // tmCount can here be 0 or 1,
+        // if it is 1, then regExpCount == 0
+        // if it is 0, regExpCount can be anything
+        if (tmCount == 1)
+          anchor = tmTypeConstraints.get(0).toString(); // +quantifierString;
+        else {
+          if (regExpCount > 0)
+            constraints.add(regExpString);
+        }
+      }
+    }
+
+    if (tags.size() > 0) {
+      if (tags.size() == 1 && anchor == null)
+        anchor = tags.iterator().next().toString();
+      else {
+        String tagsString = null;
+        for (String w : this.tags) {
+          if (tagsString == null)
+            tagsString = "IS(" + w.toString() + ")";
+          else
+            tagsString += ",IS(" + w.toString() + ")";
+        }
+        tagsString = "OR(" + tagsString + ")";
+        constraints.add(tagsString);
+      }
+    }
+
+    if (classes.size() > 0) {
+      if (classes.size() == 1 && anchor == null)
+        anchor = classes.iterator().next().toString();
+      else {
+        String classesString = null;
+        for (String w : this.classes) {
+          if (classesString == null)
+            classesString = "IS(" + w.toString() + ")";
+          else
+            classesString += ",IS(" + w.toString() + ")";
+        }
+        classesString = "OR(" + classesString + ")";
+        constraints.add(classesString);
+      }
+    }
+
+    if (anchor == null)
+      anchor = "ALL";
+
+    String result = anchor + quantifierString;
+
+    if (constraints.size() > 0) {
+      String cStr = null;
+      for (String c : constraints) {
+        if (cStr == null)
+          cStr = c.toString();
+        else
+          cStr += "," + c.toString();
+      }
+      // TODO richtig?
+      result += "{" + cStr;
+    }
+
+    if (type == MLRuleItemType.FILLER && (numberInPattern == 0)) {
+      if (constraints.size() == 0)
+        result += "{";
+      result += "->MARKONCE(" + ((TextRulerSingleSlotRule) rule).getMarkName();
+      if (patternSize > 1)
+        result += ", " + (numberInRule + 1) + ", " + (numberInRule + patternSize);
+      // for(int i=0;i < patternSize;i++) {
+      // result += ","+(i+numberInRule+1);
+      // }
+      result += ")}";
+    } else if (constraints.size() != 0)
+      result += "}";
+    return result;
+  }
+
+  public Set<TextRulerWordConstraint> getWordConstraints() {
+    return words;
+  }
+
+  public Set<String> getTagConstraints() {
+    return tags;
+  }
+
+  public Set<String> getClassConstraints() {
+    return classes;
+  }
+
+  public void setListLen(int val) {
+    this.listLen = val;
+  }
+
+  public void setListBeginsAtZero(boolean flag) {
+    this.listBeginsAtZero = flag;
+  }
+
+  public boolean listBeginsAtZero() {
+    return listBeginsAtZero;
+  }
+
+  public int listLen() {
+    return listLen;
+  }
+
+  public boolean isListItem() {
+    return (listBeginsAtZero && listLen > 0) || (listLen > 1);
+  }
+
+  public RapierRuleItem copy() {
+    RapierRuleItem newItem = new RapierRuleItem();
+    newItem.words = new HashSet<TextRulerWordConstraint>(words);
+    newItem.tags = new HashSet<String>(tags);
+    newItem.classes = new HashSet<String>(classes);
+    newItem.listLen = listLen;
+    newItem.listBeginsAtZero = listBeginsAtZero;
+    return newItem;
+  }
+
+  public int getRuleSizePoints() {
+    int result = this.isListItem() ? 3 : 2; // 3 for a list pattern item. 2
+    // for a normal pattern item
+    if (words.size() > 1)
+      result += (words.size() - 1) * 2; // every disjunct in WORD
+    // constraint counts 2
+    if (tags.size() > 1)
+      result += tags.size() - 1; // every disjunct in POS TAG constraint
+    // counts 1
+    if (classes.size() > 1)
+      result += classes.size() - 1; // every disjunct in CLASS constraint
+    // counts 1
+    return result;
+  }
+
+  public void addWordConstraints(Collection<TextRulerWordConstraint> constraints) {
+    words.addAll(constraints);
+  }
+
+  public void addTagConstraints(Collection<String> constraints) {
+    tags.addAll(constraints);
+  }
+
+  public void addClassConstraints(Collection<String> constraints) {
+    classes.addAll(constraints);
+  }
+
+  public void addWordConstraint(TextRulerWordConstraint constraint) {
+    words.add(constraint);
+  }
+
+  public void addTagConstraint(String constraint) {
+    tags.add(constraint);
+  }
+
+  public void addClassConstraint(String constraint) {
+    classes.add(constraint);
+  }
+
+  @Override
+  public String toString() {
+    return getStringForRuleString(null, null, 0, 0, 0, 0, 0);
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRuleItem.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.rapier;
+
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.PriorityQueue;
+
+public class RapierRulePriorityQueue implements Iterable<RapierRule> {
+
+  private PriorityQueue<RapierRule> ruleList;
+
+  private PriorityQueue<RapierRule> reverseRuleList;
+
+  private int maxSize;
+
+  public RapierRulePriorityQueue(int maxSize) {
+    this.maxSize = maxSize;
+
+    ruleList = new PriorityQueue<RapierRule>(maxSize, new Comparator<RapierRule>() {
+      public int compare(RapierRule o1, RapierRule o2) {
+        if (o1.getPriority() > o2.getPriority())
+          return 1;
+        else if (o1.getPriority() < o2.getPriority())
+          return -1;
+        else
+          return 0;
+      }
+    });
+    this.reverseRuleList = new PriorityQueue<RapierRule>(maxSize, new Comparator<RapierRule>() {
+      public int compare(RapierRule o1, RapierRule o2) {
+        if (o1.getPriority() < o2.getPriority())
+          return 1;
+        else if (o1.getPriority() > o2.getPriority())
+          return -1;
+        else
+          return 0;
+      }
+    });
+  }
+
+  public Iterator<RapierRule> iterator() {
+    return ruleList.iterator();
+  }
+
+  public void clear() {
+    ruleList.clear();
+    reverseRuleList.clear();
+  }
+
+  public void addAll(Collection<RapierRule> rules) {
+    for (RapierRule r : rules)
+      add(r);
+  }
+
+  public void add(RapierRule rule) {
+    ruleList.add(rule);
+    reverseRuleList.add(rule);
+    while (ruleList.size() > maxSize) {
+      Object tail = reverseRuleList.peek();
+      ruleList.remove(tail);
+      reverseRuleList.remove(tail);
+    }
+    // TextRulerToolkit.log("ADD TO RULE LIST, SIZE="+ruleList.size()+"    revSize="+reverseRuleList.size());
+    // for (RapierRule r : ruleList)
+    // {
+    // TextRulerToolkit.log("Rule Value: "+r.getPriority()+"  peek: "+ruleList.peek().getPriority());
+    // }
+  }
+
+  public RapierRule peek() {
+    return ruleList.peek();
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/rapier/RapierRulePriorityQueue.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain