You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by jo...@apache.org on 2011/08/01 16:21:35 UTC

svn commit: r1152792 [3/10] - in /uima/sandbox/trunk/TextMarker: org.apache.uima.tm.textruler.lp2/ org.apache.uima.tm.textruler.lp2/META-INF/ org.apache.uima.tm.textruler.lp2/bin/ org.apache.uima.tm.textruler.lp2/src/ org.apache.uima.tm.textruler.lp2/s...

Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/Rapier.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/Rapier.java?rev=1152792&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/Rapier.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/Rapier.java Mon Aug  1 14:21:12 2011
@@ -0,0 +1,714 @@
+package org.apache.uima.tm.textruler.rapier;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.tm.textruler.core.TextRulerAnnotation;
+import org.apache.uima.tm.textruler.core.TextRulerBasicLearner;
+import org.apache.uima.tm.textruler.core.TextRulerExample;
+import org.apache.uima.tm.textruler.core.TextRulerRule;
+import org.apache.uima.tm.textruler.core.TextRulerRuleItem;
+import org.apache.uima.tm.textruler.core.TextRulerRuleList;
+import org.apache.uima.tm.textruler.core.TextRulerRulePattern;
+import org.apache.uima.tm.textruler.core.TextRulerStatisticsCollector;
+import org.apache.uima.tm.textruler.core.TextRulerTarget;
+import org.apache.uima.tm.textruler.core.TextRulerToolkit;
+import org.apache.uima.tm.textruler.core.TextRulerWordConstraint;
+import org.apache.uima.tm.textruler.extension.TextRulerLearnerDelegate;
+
+
+public class Rapier extends TextRulerBasicLearner {
+
+  public final static String COMPRESSION_FAIL_MAX_COUNT_KEY = "compressionFailMaxCount";
+
+  public final static String RULELIST_SIZE_KEY = "ruleListSize";
+
+  public final static String PAIR_COUNT_KEY = "pairCount";
+
+  public final static String LIM_NO_IMPROVEMENTS_KEY = "limNoImprovements";
+
+  public final static String NOISE_THESHOLD_KEY = "noiseThreshold";
+
+  public final static String POSTAG_ROOTTYPE_KEY = "posTagRootType";
+
+  public final static String MIN_COVERED_POSITIVES_KEY = "minCoveredPositives";
+
+  public final static String USE_ALL_GENSETS_AT_SPECIALIZATION_KEY = "useAllGenSetsAtSpecialization";
+
+  public final static int STANDARD_COMPRESSION_FAIL_MAX_COUNT = 3;
+
+  public final static int STANDARD_RULELIST_SIZE = 50;
+
+  public final static int STANDARD_PAIR_COUNT = 4;
+
+  public final static int STANDARD_LIM_NO_IMPROVEMENTS = 3;
+
+  public final static float STANDARD_NOISE_THREHSOLD = 0.9f;
+
+  public final static String STANDARD_POSTAG_ROOTTYPE = "de.uniwue.ml.ML.postag";
+
+  public final static int STANDARD_MIN_COVERED_POSITIVES = 1;
+
+  public final static boolean STANDARD_USE_ALL_GENSETS_AT_SPECIALIZATION = true;
+
+  private int compressionFailMaxCount = STANDARD_COMPRESSION_FAIL_MAX_COUNT;
+
+  private int ruleListSize = STANDARD_RULELIST_SIZE;
+
+  private int pairCount = STANDARD_PAIR_COUNT;
+
+  private int limNoImprovements = STANDARD_LIM_NO_IMPROVEMENTS;
+
+  private float noiseThreshold = STANDARD_NOISE_THREHSOLD;
+
+  private String posTagRootTypeName = STANDARD_POSTAG_ROOTTYPE;
+
+  private int minCoveredPositives = STANDARD_MIN_COVERED_POSITIVES;
+
+  private boolean useAllGenSetsAtSpecialization = STANDARD_USE_ALL_GENSETS_AT_SPECIALIZATION;
+
+  private Map<String, TextRulerStatisticsCollector> cachedTestedRuleStatistics = new HashMap<String, TextRulerStatisticsCollector>();
+
+  private int initialRuleBaseSize;
+
+  private List<TextRulerExample> examples;
+
+  private TextRulerRuleList slotRules;
+
+  private RapierRulePriorityQueue ruleList;
+
+  private String currentSlotName;
+
+  public Rapier(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
+          Set<String> filterSet, TextRulerLearnerDelegate delegate) {
+    super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, delegate);
+  }
+
+  @Override
+  protected void doRun() {
+	for(int i = 0; i < slotNames.length; i++) {
+		int compressionFailCount = 0;
+		
+		// only working for one slot yet !
+		currentSlotName = slotNames[i];
+		cachedTestedRuleStatistics.clear();
+		exampleDocuments.createExamplesForTarget(new TextRulerTarget(currentSlotName, this));
+		examples = exampleDocuments.getAllPositiveExamples();
+		
+		if (shouldAbort())
+		  return;
+		
+		slotRules = new TextRulerRuleList();
+		ruleList = new RapierRulePriorityQueue(ruleListSize);
+		
+		TextRulerToolkit.log("--- RAPIER START for Slot " + currentSlotName);
+		
+		sendStatusUpdateToDelegate("Creating initial rule base...",
+		        TextRulerLearnerState.ML_INITIALIZING, false);
+		
+		fillSlotRulesWithMostSpecificRules();
+		
+		updateCompressionStatusString();
+		
+		if (TextRulerToolkit.DEBUG) {
+		  slotRules.saveToRulesFile(getIntermediateRulesFileName(), getTMFileHeaderString());
+		}
+		
+		while (compressionFailCount < compressionFailMaxCount) {
+		  TextRulerToolkit.log("***** NEW COMPRESSION ROUND; FailCount = " + compressionFailCount);
+		  if (shouldAbort()) {
+		    return;
+		  }
+		
+		  RapierRule bestRule = findNewRule();
+		  if (bestRule != null
+		          && (bestRule.getCoveringStatistics().getCoveredPositivesCount() >= minCoveredPositives)
+		          && (bestRule.noiseValue() >= noiseThreshold) && (!slotRules.contains(bestRule))) {
+		    addRuleAndRemoveEmpiricallySubsumedRules(bestRule);
+		    if (TextRulerToolkit.DEBUG)
+		      slotRules.saveToRulesFile(getIntermediateRulesFileName(), getTMFileHeaderString());
+		  } else {
+		    compressionFailCount++;
+		  }
+		}
+		
+		if (TextRulerToolkit.DEBUG) {
+		  slotRules.saveToRulesFile(getIntermediateRulesFileName(), getTMFileHeaderString());
+}
+	}
+
+    sendStatusUpdateToDelegate("Done", TextRulerLearnerState.ML_DONE, true);
+    cachedTestedRuleStatistics.clear();
+    TextRulerToolkit.log("--- RAPIER END");
+
+  }
+
+  private void updateCompressionStatusString() {
+    double percent = Math.round((slotRules.size() / (double) initialRuleBaseSize) * 100.0);
+    sendStatusUpdateToDelegate("Compressing... (Rules = " + slotRules.size() + "/"
+            + initialRuleBaseSize + "  = " + percent + " % ratio)",
+            TextRulerLearnerState.ML_RUNNING, true);
+    // TODO also show round numbers and compression fail count and such
+    // things!
+  }
+
+  private void addAvailablePosTagConstraintToItem(RapierRuleItem item,
+          AnnotationFS tokenAnnotation, TextRulerExample example) {
+
+    if (posTagRootTypeName != null && posTagRootTypeName.length() > 0) {
+      CAS cas = example.getDocumentCAS();
+      TypeSystem ts = cas.getTypeSystem();
+      Type posTagsRootType = ts.getType(posTagRootTypeName);
+      if (ts != null) {
+        List<AnnotationFS> posTagAnnotations = TextRulerToolkit.getAnnotationsWithinBounds(cas,
+                tokenAnnotation.getBegin(), tokenAnnotation.getEnd(), null, posTagsRootType);
+        if (posTagAnnotations.size() > 0) {
+          AnnotationFS posTag = posTagAnnotations.get(0);
+          if (posTag.getBegin() == tokenAnnotation.getBegin()
+                  && posTag.getEnd() == tokenAnnotation.getEnd())
+            item.addTagConstraint(posTag.getType().getShortName());
+        }
+      }
+    }
+  }
+
+  private void fillSlotRulesWithMostSpecificRules() {
+    slotRules.clear();
+    for (TextRulerExample example : examples) {
+      RapierRule rule = new RapierRule(this, example.getTarget());
+      TextRulerAnnotation slotAnnotation = example.getAnnotation();
+      CAS docCas = example.getDocumentCAS();
+      TypeSystem ts = docCas.getTypeSystem();
+      Type tokensRootType = ts.getType(TextRulerToolkit.TM_ANY_TYPE_NAME);
+
+      // first, get all words/tokens:
+      List<AnnotationFS> before = TextRulerToolkit.getAnnotationsBeforePosition(example
+              .getDocumentCAS(), slotAnnotation.getBegin(), -1, TextRulerToolkit
+              .getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
+      List<AnnotationFS> after = TextRulerToolkit.getAnnotationsAfterPosition(example
+              .getDocumentCAS(), slotAnnotation.getEnd(), -1, TextRulerToolkit
+              .getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
+      List<AnnotationFS> inside = TextRulerToolkit.getAnnotationsWithinBounds(example
+              .getDocumentCAS(), slotAnnotation.getBegin(), slotAnnotation.getEnd(),
+              TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
+
+      // the before annotations have to be reversed:
+      for (int i = before.size() - 1; i >= 0; i--) {
+        AnnotationFS afs = before.get(i);
+        RapierRuleItem ruleItem = new RapierRuleItem();
+        ruleItem.addWordConstraint(new TextRulerWordConstraint(new TextRulerAnnotation(afs, example
+                .getDocument())));
+        addAvailablePosTagConstraintToItem(ruleItem, afs, example);
+        rule.addPreFillerItem(ruleItem);
+      }
+
+      for (AnnotationFS afs : inside) {
+        RapierRuleItem ruleItem = new RapierRuleItem();
+        ruleItem.addWordConstraint(new TextRulerWordConstraint(new TextRulerAnnotation(afs, example
+                .getDocument())));
+        addAvailablePosTagConstraintToItem(ruleItem, afs, example);
+        rule.addFillerItem(ruleItem);
+      }
+      for (AnnotationFS afs : after) {
+        RapierRuleItem ruleItem = new RapierRuleItem();
+        ruleItem.addWordConstraint(new TextRulerWordConstraint(new TextRulerAnnotation(afs, example
+                .getDocument())));
+        addAvailablePosTagConstraintToItem(ruleItem, afs, example);
+        rule.addPostFillerItem(ruleItem);
+      }
+
+      // TextRulerToolkit.log("RULE: "+rule.getRuleString());
+      // testRuleOnTrainingsSet(rule, exampleDocuments.getDocuments());
+
+      // this rule has to at least cover its seed example!!
+      TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
+      c.addCoveredPositive(example);
+      rule.setCoveringStatistics(c);
+      slotRules.add(rule);
+    }
+    initialRuleBaseSize = slotRules.size();
+  }
+
+  protected void addRuleAndRemoveEmpiricallySubsumedRules(RapierRule rule) {
+    if (!slotRules.contains(rule)) {
+      List<TextRulerRule> rulesToRemove = new ArrayList<TextRulerRule>();
+      Set<TextRulerExample> coveredExamples = rule.getCoveringStatistics()
+              .getCoveredPositiveExamples();
+      for (TextRulerRule r : slotRules) {
+        if (coveredExamples.containsAll(r.getCoveringStatistics().getCoveredPositiveExamples()))
+          rulesToRemove.add(r);
+      }
+      for (TextRulerRule removeR : rulesToRemove)
+        slotRules.remove(removeR);
+      slotRules.add(rule);
+      updateCompressionStatusString();
+    }
+  }
+
+  protected RapierRule findNewRule() {
+    Random rand = new Random(System.currentTimeMillis());
+
+    Set<RapierRule> generalizations = new HashSet<RapierRule>();
+    // 0. initialization
+    ruleList.clear();
+
+    if (slotRules.size() <= 1)
+      return null;
+
+    List<RapierRule> uncompressedRules = new ArrayList<RapierRule>();
+    for (TextRulerRule r : slotRules) {
+      if (((RapierRule) r).isInitialRule())
+        uncompressedRules.add((RapierRule) r);
+    }
+
+    // 1. get generalizations of the two slot filler patterns:
+
+    // create pairs and prefer still uncompressed rules when choosing
+    // "randomly":
+    int pairsLeft = pairCount;
+    if (uncompressedRules.size() == 1) {
+      RapierRule rule1 = uncompressedRules.get(0);
+      RapierRule rule2 = null;
+      while (rule2 == null || rule1 == rule2) {
+        rule2 = (RapierRule) slotRules.get(rand.nextInt(slotRules.size()));
+      }
+      generalizations.addAll(getFillerGeneralizationsForRulePair(rule1, rule2));
+      if (shouldAbort())
+        return null;
+      pairsLeft--;
+    } else if (uncompressedRules.size() == 2) {
+      RapierRule rule1 = uncompressedRules.get(0);
+      RapierRule rule2 = uncompressedRules.get(1);
+      generalizations.addAll(getFillerGeneralizationsForRulePair(rule1, rule2));
+      if (shouldAbort())
+        return null;
+      pairsLeft--;
+    } else if (uncompressedRules.size() > 2) {
+      int uPairCount = pairCount;
+      if (uPairCount > uncompressedRules.size())
+        uPairCount /= 2;
+      for (int i = 0; i < uPairCount; i++) {
+        RapierRule rule1 = uncompressedRules.get(rand
+                .nextInt(uncompressedRules.size()));
+        RapierRule rule2 = null;
+        while (rule2 == null || rule1 == rule2) {
+          rule2 = uncompressedRules.get(rand.nextInt(uncompressedRules.size()));
+        }
+        generalizations.addAll(getFillerGeneralizationsForRulePair(rule1, rule2));
+        pairsLeft--;
+      }
+    }
+
+    for (int i = 0; i < pairsLeft; i++) {
+      // TODO optimize !! don't call the machinery with the same rule pair
+      // two times in one session !!!
+      // randomly pick two rules:
+      RapierRule rule1 = (RapierRule) slotRules.get(rand.nextInt(slotRules.size()));
+      RapierRule rule2 = null;
+      while (rule2 == null || rule1 == rule2) {
+        rule2 = (RapierRule) slotRules.get(rand.nextInt(slotRules.size()));
+      }
+      generalizations.addAll(getFillerGeneralizationsForRulePair(rule1, rule2));
+
+      if (shouldAbort())
+        return null;
+    }
+
+    // if (TextRulerToolkit.DEBUG)
+    // {
+    // TextRulerToolkit.log("Rule Generalizations created: " +
+    // generalizations.size());
+    // for (RapierRule newRule : generalizations)
+    // TextRulerToolkit.log("Rule = "+newRule.getRuleString());
+    // }
+
+    // 2. evaluate an enque to priority list:
+    List<RapierRule> testRules = new ArrayList<RapierRule>(generalizations);
+
+    for (RapierRule r : testRules) {
+      r.combineSenselessPatternListItems();
+    }
+
+    testRulesIfNotCached(testRules);
+    if (shouldAbort())
+      return null;
+
+    for (RapierRule newRule : generalizations) {
+      if (TextRulerToolkit.DEBUG) {
+        if (!RapierDebugHelper.debugCheckIfRuleCoversItsSeedRuleCoverings(newRule)) {
+          TextRulerToolkit
+                  .log("------------------------------------------------------------------------------------------");
+          TextRulerToolkit
+                  .log("ERROR, A RULE HAS TO COVER AT LEAST EVERY POSITIVE EXAMPLE OF ITS TWO SEED RULES!!!");
+          TextRulerToolkit.log("\t RULE: " + newRule.getRuleString());
+          TextRulerToolkit.log("\t Parent1: " + newRule.getParent1().getRuleString());
+          TextRulerToolkit.log("\t Parent2: " + newRule.getParent2().getRuleString());
+          TextRulerToolkit.log("--------");
+          TextRulerToolkit.log("+RuleCovering: "
+                  + newRule.getCoveringStatistics().getCoveredPositiveExamples());
+          TextRulerToolkit.log("+P1Covering  : "
+                  + newRule.getParent1().getCoveringStatistics().getCoveredPositiveExamples());
+          TextRulerToolkit.log("+P2Covering  : "
+                  + newRule.getParent2().getCoveringStatistics().getCoveredPositiveExamples());
+
+        }
+      }
+      ruleList.add(newRule);
+    }
+
+    // 3. specialize pre and post fillers:
+    int n = 0;
+    double bestValue = Double.MAX_VALUE;
+    int noImprovementCounter = 0;
+    while (true) {
+      n++;
+      TextRulerToolkit.log(" --- NEW SPECIALIZATOIN ROUND; n = " + n + "  noImprovementCounter = "
+              + noImprovementCounter);
+      List<RapierRule> newRuleList = new ArrayList<RapierRule>();
+      for (RapierRule curRule : ruleList) {
+
+        List<RapierRule> specTestRules = new ArrayList<RapierRule>(specializePreFiller(curRule, n));
+
+        for (RapierRule r : specTestRules)
+          r.combineSenselessPatternListItems();
+
+        testRulesIfNotCached(specTestRules);
+        if (shouldAbort())
+          return null;
+
+        for (RapierRule r : specTestRules)
+          newRuleList.add(r);
+      }
+      ruleList.addAll(newRuleList);
+
+      newRuleList.clear();
+      for (RapierRule curRule : ruleList) {
+
+        List<RapierRule> specTestRules = new ArrayList<RapierRule>(specializePostFiller(curRule, n));
+
+        for (RapierRule r : specTestRules)
+          r.combineSenselessPatternListItems();
+
+        testRulesIfNotCached(specTestRules);
+        if (shouldAbort())
+          return null;
+
+        for (RapierRule r : specTestRules)
+          newRuleList.add(r);
+      }
+      ruleList.addAll(newRuleList);
+
+      RapierRule bestRule = ruleList.peek();
+
+      if (TextRulerToolkit.DEBUG) {
+        // for (RapierRule r: ruleList)
+        // TextRulerToolkit.log("value="+r.getPriority()+" rule = "+r.getRuleString());
+        TextRulerToolkit.log("------------------------------------");
+        TextRulerToolkit.log("BEST RULE FOR THIS SESSION: " + bestRule.getCoveringStatistics());
+        TextRulerToolkit.log(bestRule.getRuleString());
+        TextRulerToolkit.log("------------------------------------");
+      }
+      if (bestRule.producesOnlyValidFillers())
+        break; // todo: horizon effects ??
+
+      if (bestRule.getPriority() < bestValue) {
+        noImprovementCounter = 0;
+        bestValue = bestRule.getPriority();
+      } else {
+        noImprovementCounter++;
+        if (noImprovementCounter > limNoImprovements)
+          break;
+      }
+    }
+
+    RapierRule bestRule = ruleList.peek();
+    return bestRule;
+  }
+
+  private List<RapierRule> getFillerGeneralizationsForRulePair(RapierRule rule1, RapierRule rule2) {
+    TextRulerToolkit
+            .log("------------------------------------------------------------------------------------------");
+    TextRulerToolkit.log("getFillerGeneralizationsForRulePair:");
+    TextRulerToolkit.log("Rule1: " + rule1.getRuleString());
+    TextRulerToolkit.log("Rule2: " + rule2.getRuleString());
+
+    List<RapierRule> result = new ArrayList<RapierRule>();
+    List<TextRulerRulePattern> genList = RapierGeneralizationHelper
+            .getGeneralizationsForRuleItemPatterns(rule1.getFillerPattern(), rule2
+                    .getFillerPattern());
+    // create rules:
+    for (TextRulerRulePattern pattern : genList) {
+      RapierRule newRule = new RapierRule(this, rule1.getTarget());
+      for (TextRulerRuleItem patternItem : pattern)
+        newRule.addFillerItem(patternItem.copy());
+      newRule.setParent1(rule1.copy());
+      newRule.setParent1PreFiller_n(0);
+      newRule.setParent1PostFiller_n(0);
+      newRule.setParent2(rule2.copy());
+      newRule.setParent2PreFiller_n(0);
+      newRule.setParent2PostFiller_n(0);
+      result.add(newRule);
+      newRule.setNeedsCompile(true);
+      // TextRulerToolkit.log("newRule: "+newRule.getRuleString());
+    }
+    TextRulerToolkit.log("   getGeneralizationsForRulePair result list size = " + result.size());
+    return result;
+  }
+
+  public List<RapierRule> specializePreFiller(RapierRule curRule, int n) {
+    RapierRule baseRule1 = curRule.getParent1();
+    RapierRule baseRule2 = curRule.getParent2();
+    int n1 = curRule.getParent1PreFiller_n();
+    int n2 = curRule.getParent2PreFiller_n();
+    TextRulerRulePattern preFiller1 = baseRule1.getPreFillerPattern();
+    TextRulerRulePattern preFiller2 = baseRule2.getPreFillerPattern();
+    int preFiller1MaxIndex = preFiller1.size() - n1 - 1;
+    int preFiller2MaxIndex = preFiller2.size() - n2 - 1;
+
+    // generate 3 different possible sets for generalizations:
+
+    // 1. n vs. n-1 (n elements of baserule1, n-1 of baserule2)
+    TextRulerRulePattern consideredPreFiller1 = new TextRulerRulePattern();
+    TextRulerRulePattern consideredPreFiller2 = new TextRulerRulePattern();
+    for (int i = preFiller1.size() - n; i >= 0 && i <= preFiller1MaxIndex; i++)
+      consideredPreFiller1.add(preFiller1.get(i));
+    for (int i = preFiller2.size() - n + 1; i >= 0 && i <= preFiller2MaxIndex; i++)
+      consideredPreFiller2.add(preFiller2.get(i));
+    List<TextRulerRulePattern> genList1 = null;
+    if (consideredPreFiller1.size() + consideredPreFiller2.size() > 0)
+      genList1 = RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(
+              consideredPreFiller1, consideredPreFiller2);
+
+    List<TextRulerRulePattern> genList2 = null;
+    List<TextRulerRulePattern> genList3 = null;
+
+    if (useAllGenSetsAtSpecialization) // due to performance reasons the
+    // user can switch this off
+    {
+      // 2. n-1 vs. n (n-1 elements of baserule1, n of baserule2)
+      consideredPreFiller1.clear();
+      consideredPreFiller2.clear();
+      for (int i = preFiller1.size() - n + 1; i >= 0 && i <= preFiller1MaxIndex; i++)
+        consideredPreFiller1.add(preFiller1.get(i));
+      for (int i = preFiller2.size() - n; i >= 0 && i <= preFiller2MaxIndex; i++)
+        consideredPreFiller2.add(preFiller2.get(i));
+
+      if (consideredPreFiller1.size() + consideredPreFiller2.size() > 0)
+        genList2 = RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(
+                consideredPreFiller1, consideredPreFiller2);
+
+      // 3. n vs. n (n elements of baserule1, n of baserule2)
+      consideredPreFiller1.clear();
+      consideredPreFiller2.clear();
+      for (int i = preFiller1.size() - n; i >= 0 && i <= preFiller1MaxIndex; i++)
+        consideredPreFiller1.add(preFiller1.get(i));
+      for (int i = preFiller2.size() - n; i >= 0 && i <= preFiller2MaxIndex; i++)
+        consideredPreFiller2.add(preFiller2.get(i));
+      if (consideredPreFiller1.size() + consideredPreFiller2.size() > 0)
+        genList3 = RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(
+                consideredPreFiller1, consideredPreFiller2);
+    }
+
+    // TODO optimize and don't store all 3 genLists ! but for debugging
+    // purposes we keep them for now !
+    Set<TextRulerRulePattern> genSet = new HashSet<TextRulerRulePattern>();
+    if (genList1 != null)
+      genSet.addAll(genList1);
+    if (genList2 != null)
+      genSet.addAll(genList2);
+    if (genList3 != null)
+      genSet.addAll(genList3);
+
+    List<RapierRule> resultRules = new ArrayList<RapierRule>();
+
+    for (TextRulerRulePattern l : genSet) {
+      RapierRule newRule = curRule.copy();
+      for (int i = l.size() - 1; i >= 0; i--)
+        newRule.addPreFillerItem(l.get(i));
+      newRule.setParent1PreFiller_n(n);
+      newRule.setParent2PreFiller_n(n);
+      resultRules.add(newRule);
+    }
+    return resultRules;
+  }
+
+  // n = 1..maxN
+  public List<RapierRule> specializePostFiller(RapierRule curRule, int n) {
+    if (n == 0) {
+      TextRulerToolkit.log("ERROR ! N SHOULD NOT BE 0!");
+    }
+    RapierRule baseRule1 = curRule.getParent1();
+    RapierRule baseRule2 = curRule.getParent2();
+    int n1 = curRule.getParent1PostFiller_n();
+    int n2 = curRule.getParent2PostFiller_n();
+    TextRulerRulePattern postFiller1 = baseRule1.getPostFillerPattern();
+    TextRulerRulePattern postFiller2 = baseRule2.getPostFillerPattern();
+    int postFiller1MinIndex = n1;
+    int postFiller2MinIndex = n2;
+
+    // generate 3 different possible sets for generalizations:
+
+    // 1. n vs. n-1 (n elements of baserule1, n-1 of baserule2)
+    TextRulerRulePattern consideredPostFiller1 = new TextRulerRulePattern();
+    TextRulerRulePattern consideredPostFiller2 = new TextRulerRulePattern();
+    for (int i = postFiller1MinIndex; i < postFiller1.size() && i < n; i++)
+      consideredPostFiller1.add(postFiller1.get(i));
+    for (int i = postFiller2MinIndex; i < postFiller2.size() && i < n - 1; i++)
+      consideredPostFiller2.add(postFiller2.get(i));
+    List<TextRulerRulePattern> genList1 = null;
+    if (consideredPostFiller1.size() + consideredPostFiller2.size() > 0)
+      genList1 = RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(
+              consideredPostFiller1, consideredPostFiller2);
+
+    // 2. n-1 vs. n (n-1 elements of baserule1, n of baserule2)
+    consideredPostFiller1.clear();
+    consideredPostFiller2.clear();
+    for (int i = postFiller1MinIndex; i < postFiller1.size() && i < n - 1; i++)
+      consideredPostFiller1.add(postFiller1.get(i));
+    for (int i = postFiller2MinIndex; i < postFiller2.size() && i < n; i++)
+      consideredPostFiller2.add(postFiller2.get(i));
+    List<TextRulerRulePattern> genList2 = null;
+    if (consideredPostFiller1.size() + consideredPostFiller2.size() > 0)
+      genList2 = RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(
+              consideredPostFiller1, consideredPostFiller2);
+
+    // 3. n vs. n (n elements of baserule1, n of baserule2)
+    consideredPostFiller1.clear();
+    consideredPostFiller2.clear();
+    for (int i = postFiller1MinIndex; i < postFiller1.size() && i < n; i++)
+      consideredPostFiller1.add(postFiller1.get(i));
+    for (int i = postFiller2MinIndex; i < postFiller2.size() && i < n; i++)
+      consideredPostFiller2.add(postFiller2.get(i));
+    List<TextRulerRulePattern> genList3 = null;
+    if (consideredPostFiller1.size() + consideredPostFiller2.size() > 0)
+      genList3 = RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(
+              consideredPostFiller1, consideredPostFiller2);
+
+    // TODO optimize and don't store all 3 genLists ! but for debugging
+    // purposes we keep them for now !
+    Set<TextRulerRulePattern> genSet = new HashSet<TextRulerRulePattern>();
+    if (genList1 != null)
+      genSet.addAll(genList1);
+    if (genList2 != null)
+      genSet.addAll(genList2);
+    if (genList3 != null)
+      genSet.addAll(genList3);
+
+    List<RapierRule> resultRules = new ArrayList<RapierRule>();
+
+    for (TextRulerRulePattern l : genSet) {
+      RapierRule newRule = curRule.copy();
+      for (TextRulerRuleItem t : l)
+        newRule.addPostFillerItem(t);
+      newRule.setParent1PostFiller_n(n);
+      newRule.setParent2PostFiller_n(n);
+      resultRules.add(newRule);
+    }
+    return resultRules;
+  }
+
+  @Override
+  public boolean collectNegativeCoveredInstancesWhenTesting() {
+    return false;
+  }
+
+  public String getResultString() {
+    if (slotRules != null)
+      return slotRules.getTMFileString(getTMFileHeaderString(), 1000); // if
+    // a
+    // rule
+    // is
+    // >100
+    // characters,
+    // it
+    // gets
+    // replaced
+    // by
+    // a
+    // placeholder
+    else
+      return "No results available yet!";
+  }
+
+  public void setParameters(Map<String, Object> params) {
+    if (TextRulerToolkit.DEBUG)
+      saveParametersToTempFolder(params);
+
+    // TODO add try catch
+    if (params.containsKey(COMPRESSION_FAIL_MAX_COUNT_KEY))
+      compressionFailMaxCount = (Integer) params.get(COMPRESSION_FAIL_MAX_COUNT_KEY);
+
+    if (params.containsKey(RULELIST_SIZE_KEY))
+      ruleListSize = (Integer) params.get(RULELIST_SIZE_KEY);
+
+    if (params.containsKey(PAIR_COUNT_KEY))
+      pairCount = (Integer) params.get(PAIR_COUNT_KEY);
+
+    if (params.containsKey(LIM_NO_IMPROVEMENTS_KEY))
+      limNoImprovements = (Integer) params.get(LIM_NO_IMPROVEMENTS_KEY);
+
+    if (params.containsKey(NOISE_THESHOLD_KEY))
+      noiseThreshold = (Float) params.get(NOISE_THESHOLD_KEY);
+
+    if (params.containsKey(POSTAG_ROOTTYPE_KEY))
+      posTagRootTypeName = (String) params.get(POSTAG_ROOTTYPE_KEY);
+
+    if (params.containsKey(MIN_COVERED_POSITIVES_KEY))
+      minCoveredPositives = (Integer) params.get(MIN_COVERED_POSITIVES_KEY);
+
+    if (params.containsKey(USE_ALL_GENSETS_AT_SPECIALIZATION_KEY))
+      useAllGenSetsAtSpecialization = (Boolean) params.get(USE_ALL_GENSETS_AT_SPECIALIZATION_KEY);
+  }
+
+  // TODO share this between algorithms (e.g. LP2 and RAPIER ?) and make a
+  // maximum size of the cache, etc. like CasCache?
+  protected void testRulesIfNotCached(List<RapierRule> rules) {
+    List<TextRulerRule> rulesToTest = new ArrayList<TextRulerRule>();
+
+    for (RapierRule r : rules) {
+      String key = r.getRuleString();
+      if (cachedTestedRuleStatistics.containsKey(key)) {
+        r.setCoveringStatistics(cachedTestedRuleStatistics.get(key).copy());
+        TextRulerToolkit.log("CACHE HIT; size=" + cachedTestedRuleStatistics.size());
+      } else
+        rulesToTest.add(r);
+    }
+
+    if (rulesToTest.size() > 0) {
+      testRulesOnDocumentSet(rulesToTest, exampleDocuments);
+      if (shouldAbort())
+        return;
+      while (cachedTestedRuleStatistics.size() + rulesToTest.size() > 10000) // TODO
+      // lohnt
+      // sich
+      // das
+      // ?
+      // speicher
+      // beobachten
+      // !!
+      {
+        Iterator<String> it = cachedTestedRuleStatistics.keySet().iterator();
+        if (!it.hasNext())
+          break;
+        String removeKey = cachedTestedRuleStatistics.keySet().iterator().next();
+        cachedTestedRuleStatistics.remove(removeKey);
+      }
+
+      for (TextRulerRule r : rulesToTest) {
+        String key = r.getRuleString();
+        cachedTestedRuleStatistics.put(key, r.getCoveringStatistics().copy());
+      }
+    }
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/Rapier.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/Rapier.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierDebugHelper.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierDebugHelper.java?rev=1152792&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierDebugHelper.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierDebugHelper.java Mon Aug  1 14:21:12 2011
@@ -0,0 +1,19 @@
+package org.apache.uima.tm.textruler.rapier;
+
+import java.util.Set;
+
+import org.apache.uima.tm.textruler.core.TextRulerExample;
+
+public class RapierDebugHelper {
+
+  public static boolean debugCheckIfRuleCoversItsSeedRuleCoverings(RapierRule rule) {
+    Set<TextRulerExample> parent1Positives = rule.getParent1().getCoveringStatistics()
+            .getCoveredPositiveExamples();
+    Set<TextRulerExample> parent2Positives = rule.getParent2().getCoveringStatistics()
+            .getCoveredPositiveExamples();
+    Set<TextRulerExample> rulePositives = rule.getCoveringStatistics().getCoveredPositiveExamples();
+    return rulePositives.containsAll(parent1Positives)
+            && rulePositives.containsAll(parent2Positives);
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierDebugHelper.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierDebugHelper.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierFactory.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierFactory.java?rev=1152792&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierFactory.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierFactory.java Mon Aug  1 14:21:12 2011
@@ -0,0 +1,57 @@
+package org.apache.uima.tm.textruler.rapier;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.uima.tm.textruler.extension.TextRulerLearner;
+import org.apache.uima.tm.textruler.extension.TextRulerLearnerDelegate;
+import org.apache.uima.tm.textruler.extension.TextRulerLearnerFactory;
+import org.apache.uima.tm.textruler.extension.TextRulerLearnerParameter;
+import org.apache.uima.tm.textruler.extension.TextRulerLearnerParameter.MLAlgorithmParamType;
+
+public class RapierFactory implements TextRulerLearnerFactory {
+
+  public TextRulerLearner createAlgorithm(String inputFolderPath, String additionalFolderPath,
+          String prePropTMFile, String tempFolderPath, String[] fullSlotTypeNames,
+          Set<String> filterSet, TextRulerLearnerDelegate delegate) {
+    return new Rapier(inputFolderPath, prePropTMFile, tempFolderPath, fullSlotTypeNames, filterSet,
+            delegate);
+  }
+
+  public TextRulerLearnerParameter[] getAlgorithmParameters() {
+    TextRulerLearnerParameter[] result = new TextRulerLearnerParameter[8];
+
+    result[0] = new TextRulerLearnerParameter(Rapier.COMPRESSION_FAIL_MAX_COUNT_KEY,
+            "Maximum Compression Fail Count", MLAlgorithmParamType.ML_INT_PARAM);
+    result[1] = new TextRulerLearnerParameter(Rapier.RULELIST_SIZE_KEY, "Internal Rules List Size",
+            MLAlgorithmParamType.ML_INT_PARAM);
+    result[2] = new TextRulerLearnerParameter(Rapier.PAIR_COUNT_KEY, "Rule Pairs for Generalizing",
+            MLAlgorithmParamType.ML_INT_PARAM);
+    result[3] = new TextRulerLearnerParameter(Rapier.LIM_NO_IMPROVEMENTS_KEY,
+            "Maximum 'No improvement' Count", MLAlgorithmParamType.ML_INT_PARAM);
+    result[4] = new TextRulerLearnerParameter(Rapier.NOISE_THESHOLD_KEY, "Maximum Noise Threshold",
+            MLAlgorithmParamType.ML_FLOAT_PARAM);
+    result[5] = new TextRulerLearnerParameter(Rapier.MIN_COVERED_POSITIVES_KEY,
+            "Minimum Covered Positives Per Rule", MLAlgorithmParamType.ML_INT_PARAM);
+    result[6] = new TextRulerLearnerParameter(Rapier.POSTAG_ROOTTYPE_KEY, "PosTag Root Type",
+            MLAlgorithmParamType.ML_STRING_PARAM);
+    result[7] = new TextRulerLearnerParameter(Rapier.USE_ALL_GENSETS_AT_SPECIALIZATION_KEY,
+            "Use All 3 GenSets at Specialization", MLAlgorithmParamType.ML_BOOL_PARAM);
+    return result;
+  }
+
+  public Map<String, Object> getAlgorithmParameterStandardValues() {
+    Map<String, Object> result = new HashMap<String, Object>();
+    result.put(Rapier.COMPRESSION_FAIL_MAX_COUNT_KEY, Rapier.STANDARD_COMPRESSION_FAIL_MAX_COUNT);
+    result.put(Rapier.RULELIST_SIZE_KEY, Rapier.STANDARD_RULELIST_SIZE);
+    result.put(Rapier.PAIR_COUNT_KEY, Rapier.STANDARD_PAIR_COUNT);
+    result.put(Rapier.LIM_NO_IMPROVEMENTS_KEY, Rapier.STANDARD_LIM_NO_IMPROVEMENTS);
+    result.put(Rapier.NOISE_THESHOLD_KEY, Rapier.STANDARD_NOISE_THREHSOLD);
+    result.put(Rapier.POSTAG_ROOTTYPE_KEY, Rapier.STANDARD_POSTAG_ROOTTYPE);
+    result.put(Rapier.MIN_COVERED_POSITIVES_KEY, Rapier.STANDARD_MIN_COVERED_POSITIVES);
+    result.put(Rapier.USE_ALL_GENSETS_AT_SPECIALIZATION_KEY,
+            Rapier.STANDARD_USE_ALL_GENSETS_AT_SPECIALIZATION);
+    return result;
+  }
+}
\ No newline at end of file

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierFactory.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierGeneralizationHelper.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierGeneralizationHelper.java?rev=1152792&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierGeneralizationHelper.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierGeneralizationHelper.java Mon Aug  1 14:21:12 2011
@@ -0,0 +1,673 @@
+package org.apache.uima.tm.textruler.rapier;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.uima.tm.textruler.core.TextRulerRuleItem;
+import org.apache.uima.tm.textruler.core.TextRulerRulePattern;
+import org.apache.uima.tm.textruler.core.TextRulerToolkit;
+import org.apache.uima.tm.textruler.core.TextRulerWordConstraint;
+
+public class RapierGeneralizationHelper {
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- ITEM(s) GENERALIZATION
+  // -------------------------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  private static ArrayList<TextRulerRuleItem> getGeneralizationsForRuleItems(
+          TextRulerRuleItem item1, TextRulerRuleItem item2) {
+    ArrayList<TextRulerRuleItem> item1List = new ArrayList<TextRulerRuleItem>();
+    ArrayList<TextRulerRuleItem> item2List = new ArrayList<TextRulerRuleItem>();
+    if (item1 != null)
+      item1List.add(item1);
+    if (item2 != null)
+      item2List.add(item2);
+    return getGeneralizationsForRuleItemLists(item1List, item2List);
+  }
+
+  // generalize two given pattern elements or pattern element lists and return
+  // all possible generalizations as
+  // TextRulerRuleItem objects
+  private static ArrayList<TextRulerRuleItem> getGeneralizationsForRuleItemLists(
+          ArrayList<TextRulerRuleItem> item1List, ArrayList<TextRulerRuleItem> item2List) {
+    ArrayList<RapierRuleItem> proposedWordConstraints = new ArrayList<RapierRuleItem>();
+    ArrayList<RapierRuleItem> proposedTagConstraints = new ArrayList<RapierRuleItem>();
+    ArrayList<RapierRuleItem> proposedClassConstraints = new ArrayList<RapierRuleItem>();
+    ArrayList<TextRulerRuleItem> result = new ArrayList<TextRulerRuleItem>();
+
+    int resultListLen1 = 0;
+    int resultListLen2 = 0;
+    boolean oneListIsEmpty = false;
+
+    if (item1List.size() == 0 && item2List.size() == 0) {
+      TextRulerToolkit.log("ERROR !");
+    }
+    if (item1List.size() == 0 || item2List.size() == 0) {
+      // TextRulerToolkit.log("SPECIAL CASE WITH ONE LIST OF ZERO SIZE");
+      oneListIsEmpty = true;
+    }
+
+    boolean hasEmptyWordList = false;
+    int maxWordCount = 0;
+    boolean hasEmptyTagList = false;
+    int maxTagCount = 0;
+    for (TextRulerRuleItem rt : item2List) {
+      RapierRuleItem t = (RapierRuleItem) rt;
+      resultListLen2 += t.isListItem() ? t.listLen() : 1;
+      if (t.getWordConstraints().size() > maxWordCount)
+        maxWordCount = t.getWordConstraints().size();
+      if (t.getWordConstraints().size() == 0)
+        hasEmptyWordList = true;
+      if (t.getTagConstraints().size() > maxTagCount)
+        maxTagCount = t.getTagConstraints().size();
+      if (t.getTagConstraints().size() == 0)
+        hasEmptyTagList = true;
+    }
+    for (TextRulerRuleItem rt : item1List) {
+      RapierRuleItem t = (RapierRuleItem) rt;
+      resultListLen1 += t.isListItem() ? t.listLen() : 1;
+      if (t.getWordConstraints().size() > maxWordCount)
+        maxWordCount = t.getWordConstraints().size();
+      if (t.getWordConstraints().size() == 0)
+        hasEmptyWordList = true;
+      if (t.getTagConstraints().size() > maxTagCount)
+        maxTagCount = t.getTagConstraints().size();
+      if (t.getTagConstraints().size() == 0)
+        hasEmptyTagList = true;
+    }
+    int resultListLen = resultListLen1 > resultListLen2 ? resultListLen1 : resultListLen2; // take
+    // the
+    // bigger
+    // of
+    // both
+    if (resultListLen == 1 && !oneListIsEmpty)
+      resultListLen = 0; // lists with a length of 1 can only occur when
+    // one itemList is empty! THAT CANNOT HAPPEN
+    // HERE!
+
+    // generalize word constraints:
+    if (hasEmptyWordList) // at least one constraint of both is empty
+    {
+      // do nothing here, proposed.wordItems stays empty
+      proposedWordConstraints.add(new RapierRuleItem());
+    } else // create union of both constraints AND (if both constraints
+    // weren't the same) drop constraint
+    {
+      RapierRuleItem proposed = new RapierRuleItem();
+      for (TextRulerRuleItem t : item1List)
+        proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());
+      for (TextRulerRuleItem t : item2List)
+        proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());
+
+      proposedWordConstraints.add(proposed);
+
+      // if the union of both constraints is a real union (one does not
+      // subsume the other completely),
+      // we have to add the DROPPING OF THE CONSTRAINT as a second
+      // proposed word constraint
+      if (maxWordCount != proposed.getWordConstraints().size()) // the
+      // union
+      // is a
+      // real
+      // bigger
+      // set
+      // than
+      {
+        proposedWordConstraints.add(new RapierRuleItem());
+      }
+    }
+
+    if (hasEmptyTagList) // at least one constraint of both is empty
+    {
+      // do nothing here, proposed.tagItems stays empty
+      proposedTagConstraints.add(new RapierRuleItem());
+    } else // create union of both constraints AND (if both constraints
+    // weren't the same) drop constraint
+    {
+      RapierRuleItem proposed = new RapierRuleItem();
+      for (TextRulerRuleItem t : item1List)
+        proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());
+      for (TextRulerRuleItem t : item2List)
+        proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());
+
+      proposedTagConstraints.add(proposed);
+
+      // if the union of both constraints is a real union (one does not
+      // subsume the other completely),
+      // we have to add the DROPPING OF THE CONSTRAINT as a second
+      // proposed tag constraint
+      if (maxTagCount != proposed.getTagConstraints().size()) // the union
+      // is a real
+      // bigger
+      // set than
+      {
+        proposedTagConstraints.add(new RapierRuleItem());
+      }
+    }
+
+    // TODO semantic class generalization
+    proposedClassConstraints.add(new RapierRuleItem()); // add only NO
+    // class
+    // constraint
+    // version for
+    // now!
+
+    // finally, create all combinations of the above proposed items
+    for (RapierRuleItem wt : proposedWordConstraints) {
+      for (RapierRuleItem tt : proposedTagConstraints) {
+        for (RapierRuleItem ct : proposedClassConstraints) {
+          RapierRuleItem newItem = new RapierRuleItem();
+          for (TextRulerWordConstraint wi : wt.getWordConstraints())
+            newItem.addWordConstraint(wi.copy());
+          for (String ti : tt.getTagConstraints())
+            newItem.addTagConstraint(ti);
+          for (String tc : ct.getClassConstraints())
+            newItem.addClassConstraint(tc);
+          newItem.setListLen(resultListLen);
+          newItem.setListBeginsAtZero(oneListIsEmpty && resultListLen > 0);
+          result.add(newItem);
+        }
+      }
+    }
+    return result;
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- EQUAL SIZE PATTERN GENERALIZATION
+  // --------------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  private static ArrayList<TextRulerRulePattern> getGeneralizationsForRuleItemPatternsOfEqualSize(
+          TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+
+    ArrayList<ArrayList<TextRulerRuleItem>> generalizationTable = new ArrayList<ArrayList<TextRulerRuleItem>>();
+    Iterator<TextRulerRuleItem> it2 = pattern2.iterator();
+    if (pattern1.size() != pattern2.size()) {
+      TextRulerToolkit.log("ERROR!");
+    }
+    for (TextRulerRuleItem item1 : pattern1) {
+      TextRulerRuleItem item2 = it2.next();
+      // get all possible LGGs of the current two elements and save them
+      // into the matrix
+      ArrayList<TextRulerRuleItem> allLGGs = getGeneralizationsForRuleItems(item1, item2);
+      generalizationTable.add(allLGGs);
+      // TextRulerToolkit.log("--- GET GENERALISATIONS FOR TWO TERMS:  --"+t1+"--   --"+t2+"--");
+      // ArrayList<MLRapierRuleTerm> allLGGs =
+      // this.getGeneralizationsForRuleTerms(t1, t2);
+      // for (MLRapierRuleTerm term : allLGGs)
+      // TextRulerToolkit.log("--- "+term);
+      // TextRulerToolkit.log("--- END");
+    }
+
+    // now we have patternSize lists of possible generalizations, one list
+    // per original pattern item pair of
+    // pattern1 and pattern2. we now have to build all possible
+    // combinations. Each combination is a
+    // new pattern
+    recursiveBuildAllRuleItemCombinations(generalizationTable, 0, new TextRulerRulePattern(),
+            resultList);
+    return resultList;
+  }
+
+  private static void recursiveBuildAllRuleItemCombinations(
+          ArrayList<ArrayList<TextRulerRuleItem>> table, int curIndex,
+          TextRulerRulePattern currentPattern, ArrayList<TextRulerRulePattern> resultPatterns) {
+    if (curIndex >= table.size()) {
+      // make a deep copy of the current pattern:
+      TextRulerRulePattern copy = new TextRulerRulePattern();
+      for (TextRulerRuleItem item : currentPattern)
+        copy.add(item.copy());
+      resultPatterns.add(copy);
+    } else {
+      for (TextRulerRuleItem item : table.get(curIndex)) {
+        currentPattern.add(item);
+        recursiveBuildAllRuleItemCombinations(table, curIndex + 1, currentPattern, resultPatterns);
+        currentPattern.remove(currentPattern.size() - 1);
+      }
+    }
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- DISTINCT SIZE PATTERN GENERALIZATION
+  // -----------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+
+  // this is the low level version for patterns of distinct sizes! it creates
+  // ALL possible combinations how to pair/map pattern items
+  // of the shorter with those of the longer pattern. the optimizing version
+  // (getOptimizedGeneralizationsForRuleItemPatternsOfDistinctSize)
+  // uses this method for its pattern segments! (see below)
+  private static ArrayList<TextRulerRulePattern> getGeneralizationsForRuleItemPatternsOfDistinctSize(
+          TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+    if (pattern1.size() == pattern2.size()) {
+      TextRulerToolkit.log("ERROR! CALL getGeneralizationsForRuleItemPatternsOfEqualSize instead!");
+      if (TextRulerToolkit.DEBUG)
+        return null;
+    }
+
+    TextRulerRulePattern longerPattern = pattern1;
+    TextRulerRulePattern shorterPattern = pattern2;
+    if (pattern2.size() > pattern1.size()) {
+      longerPattern = pattern2;
+      shorterPattern = pattern1;
+    }
+
+    if (longerPattern.size() <= 1 || shorterPattern.size() <= 1) {
+      // Special case 1: one of the pattern terms list is empty AND
+      // special case 2: one has only ONE element
+      if (longerPattern.size() + shorterPattern.size() == 0) {
+        TextRulerToolkit.log("ERROR !! BOTH PATTERNS ARE EMPTY!");
+        if (TextRulerToolkit.DEBUG)
+          return null;
+      }
+
+      // get all possible generalizations of the two patterns. result of
+      // each generalization is ONE rule item, so we
+      // don't use TextRulerRulePattern here since this IS NOT a rule
+      // pattern! it's a list of possible generalizations:
+      ArrayList<TextRulerRuleItem> generalizations = getGeneralizationsForRuleItemLists(
+              longerPattern, shorterPattern);
+      // create a one element result pattern for each:
+      for (TextRulerRuleItem item : generalizations) {
+        TextRulerRulePattern p = new TextRulerRulePattern();
+        p.add(item);
+        resultList.add(p);
+      }
+    }
+    // else SPECIAL CASE 3 // TODO make those values configurable ?
+    else if (((longerPattern.size() - shorterPattern.size()) > 6) || (longerPattern.size() > 10)) {
+      int resultListLen1 = 0;
+      for (TextRulerRuleItem rt : shorterPattern)
+        resultListLen1 += ((RapierRuleItem) rt).isListItem() ? ((RapierRuleItem) rt).listLen() : 1;
+      int resultListLen2 = 0;
+      for (TextRulerRuleItem rt : longerPattern)
+        resultListLen2 += ((RapierRuleItem) rt).isListItem() ? ((RapierRuleItem) rt).listLen() : 1;
+
+      RapierRuleItem singleItem = new RapierRuleItem();
+      singleItem.setListLen(resultListLen1 > resultListLen2 ? resultListLen1 : resultListLen2);
+      TextRulerRulePattern singleItemPattern = new TextRulerRulePattern();
+      singleItemPattern.add(singleItem);
+      resultList.add(singleItemPattern);
+    } else { // sizes are different and both > 1
+      // create all possible generalization combinations, that is: how can
+      // we map elements of the shorter pattern
+      // to the ones of the longer pattern and then generalize each
+      // mapping (each group) of items?
+      ArrayList<ArrayList<RapierPatternItemMapping>> combinationList = new ArrayList<ArrayList<RapierPatternItemMapping>>();
+
+      recursiveBuildAllPossiblePatternMappingSequences(longerPattern, shorterPattern,
+              new ArrayList<RapierPatternItemMapping>(), combinationList);
+
+      for (ArrayList<RapierPatternItemMapping> mappingSequence : combinationList) {
+        resultList.addAll(getGeneralizationsForPatternMappingSequence(mappingSequence));
+      }
+    }
+    return resultList;
+  }
+
+  // creates all possible combinations how to pair together items from the
+  // longer and the shorter source pattern, e.g.
+  // 1 2 3 4 5 vs. 1 2 3 = 1/1+2+3 2/4 3/5, ...
+  private static void recursiveBuildAllPossiblePatternMappingSequences(
+          TextRulerRulePattern longerPattern, TextRulerRulePattern shorterPattern,
+          ArrayList<RapierPatternItemMapping> currentMappingSequence,
+          ArrayList<ArrayList<RapierPatternItemMapping>> resultList) {
+    int windowSize = longerPattern.size() - shorterPattern.size() + 1;
+
+    if (shorterPattern.size() > longerPattern.size()) {
+      TextRulerToolkit.log("ERROR: SHORTER > LONGER !!");
+    }
+    if (longerPattern.size() == 0 || shorterPattern.size() == 0) {
+      TextRulerToolkit.log("ERROR: SHORTER == LONGER == 0!");
+    } else {
+      // if the remaining (sub-)patterns are of equal size or one has only
+      // one element left, create one last item mapping and
+      // a final result mapping sequence:
+      if (shorterPattern.size() == 1 || (longerPattern.size() == shorterPattern.size())) {
+        RapierPatternItemMapping lastMapping = new RapierPatternItemMapping();
+        lastMapping.shorterPattern.addAll(shorterPattern);
+        lastMapping.longerPattern.addAll(longerPattern);
+        ArrayList<RapierPatternItemMapping> newMappingSequence = new ArrayList<RapierPatternItemMapping>();
+        newMappingSequence.addAll(currentMappingSequence);
+        newMappingSequence.add(lastMapping);
+        resultList.add(newMappingSequence);
+      } else { // otherwise we have to create all possible combinations of
+        // the longer and shorter remaining pattern:
+        TextRulerRuleItem firstItem = shorterPattern.get(0);
+        // combine with 0, 0/1, ... 0/1/2/.../windowSize-1
+        for (int maxi = 0; maxi < windowSize; maxi++) {
+          RapierPatternItemMapping newMapping = new RapierPatternItemMapping();
+          newMapping.shorterPattern.add(firstItem);
+          for (int li = 0; li <= maxi; li++)
+            newMapping.longerPattern.add(longerPattern.get(li));
+          currentMappingSequence.add(newMapping);
+          TextRulerRulePattern restLongerPattern = new TextRulerRulePattern();
+          TextRulerRulePattern restShorterPattern = new TextRulerRulePattern();
+          for (int i = 1; i < shorterPattern.size(); i++)
+            restShorterPattern.add(shorterPattern.get(i));
+          for (int i = maxi + 1; i < longerPattern.size(); i++)
+            restLongerPattern.add(longerPattern.get(i));
+
+          // recurse:
+          recursiveBuildAllPossiblePatternMappingSequences(restLongerPattern, restShorterPattern,
+                  currentMappingSequence, resultList);
+
+          // remove last segment to get back to the same state as
+          // before the recursion:
+          currentMappingSequence.remove(currentMappingSequence.size() - 1);
+        }
+      }
+    }
+  }
+
+  // here the input is called a MAPPING instead of a pattern segmentation in
+  // order to distinguish between the two levels of
+  // dividing the problem: a pattern segmentation is a special mapping of
+  // equal items in the two to generalize source patterns;
+  // the segments that result through that segmentation still need to be
+  // generalized (see getGeneralizationsForPatternSegmentation)
+  // if sucha semgent has subpattersn of different size,
+  // getGeneralizationsForRuleItemPatternsOfDistinctSize is used to
+  // generalize it, which uses THIS METHOD HERE to get all generalizations for
+  // a special MAPPING. a mapping (in comparison to the
+  // segmentation!) is a mapping between the longer and shorter pattern items
+  // which then get directly generalized here!
+  // in order to show this difference, we use the (inernally exactly the
+  // same!) class RapierPatternItemMapping instead of
+  // RapierPatternSegment!)
+  private static ArrayList<TextRulerRulePattern> getGeneralizationsForPatternMappingSequence(
+          ArrayList<RapierPatternItemMapping> patternMappingSequence) {
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+    ArrayList<ArrayList<TextRulerRuleItem>> generalizationTable = new ArrayList<ArrayList<TextRulerRuleItem>>();
+
+    // every mapping has several possible generalizations, so we store all
+    // of them in that generalizationTable, one list of
+    // generalizations for each mapping:
+    for (RapierPatternItemMapping mapping : patternMappingSequence) {
+      ArrayList<TextRulerRuleItem> lggList = getGeneralizationsForRuleItemLists(
+              mapping.shorterPattern, mapping.longerPattern);
+      generalizationTable.add(lggList);
+    }
+
+    // afterwards we have again to create all possible combinations of those
+    // lists (like in the equalSizeGeneralization):
+    // Each combination is a new pattern
+    recursiveBuildAllRuleItemCombinations(generalizationTable, 0, new TextRulerRulePattern(),
+            resultList);
+    return resultList;
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- FIND MATCHINGS BETWEEN PATTERNS FOR GENERALIZATION
+  // -----------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+
+  // find matches in two patterns by comparing the items of the patterns and
+  // return all possible segmentations of those two patterns.
+  private static void recursiveFindPatternSegmentsByMatchingPatternItems(
+          TextRulerRulePattern longerPattern, TextRulerRulePattern shorterPattern,
+          ArrayList<RapierPatternSegment> currentSegmentation,
+          ArrayList<ArrayList<RapierPatternSegment>> resultList) {
+    int cmpWindowSize = longerPattern.size() - shorterPattern.size() + 1;
+
+    // is one
+    // (rest-)pattern
+    // empty ?
+    if (longerPattern.size() == 0 || shorterPattern.size() == 0) {
+      // create result segmentation
+      ArrayList<RapierPatternSegment> newSegmentation = new ArrayList<RapierPatternSegment>();
+      newSegmentation.addAll(currentSegmentation); // add current
+      // add rest if
+      // recursive state
+      // anything is
+      // left one of
+      // the
+      // patterns
+      if (longerPattern.size() + shorterPattern.size() > 0) {
+        RapierPatternSegment lastSegment = new RapierPatternSegment();
+        for (TextRulerRuleItem i : shorterPattern)
+          lastSegment.shorterPattern.add(i);
+        for (TextRulerRuleItem i : longerPattern)
+          lastSegment.longerPattern.add(i);
+        newSegmentation.add(lastSegment);
+      }
+      resultList.add(newSegmentation);
+    } else {
+      boolean matched = false;
+      for (int si = 0; si < shorterPattern.size(); si++) {
+        // compare element si with si, si+1, ... si+cmpWindowSize-1
+        for (int li = si; li < si + cmpWindowSize; li++) {
+          if (longerPattern.get(li).equals(shorterPattern.get(si))) {
+            // matched pair found!
+            matched = true;
+            // --> calculate pattern segments, add them to the
+            // current one and pass the rest of the
+            // patterns to the next recursion level:
+            RapierPatternSegment newSegment = new RapierPatternSegment();
+            for (int i = 0; i < si; i++)
+              newSegment.shorterPattern.add(shorterPattern.get(i));
+            for (int i = 0; i < li; i++)
+              newSegment.longerPattern.add(longerPattern.get(i));
+
+            boolean addedLeftSegmentation = false;
+            if (newSegment.longerPattern.size() > 0 || newSegment.shorterPattern.size() > 0) {
+              // only add if the segmentation is not empty!
+              currentSegmentation.add(newSegment);
+              addedLeftSegmentation = true;
+            }
+
+            RapierPatternSegment matchedSegment = new RapierPatternSegment();
+            matchedSegment.shorterPattern.add(shorterPattern.get(si));
+            matchedSegment.longerPattern.add(longerPattern.get(li));
+            currentSegmentation.add(matchedSegment);
+
+            // the rest is now the rest to the right of both (so
+            // li+1 and si+1 to the ends...)
+            TextRulerRulePattern restLongerPattern = new TextRulerRulePattern();
+            TextRulerRulePattern restShorterPattern = new TextRulerRulePattern();
+            for (int i = li + 1; i < longerPattern.size(); i++)
+              restLongerPattern.add(longerPattern.get(i));
+            for (int i = si + 1; i < shorterPattern.size(); i++)
+              restShorterPattern.add(shorterPattern.get(i));
+
+            // recurse...
+            if (restLongerPattern.size() > restShorterPattern.size())
+              recursiveFindPatternSegmentsByMatchingPatternItems(restLongerPattern,
+                      restShorterPattern, currentSegmentation, resultList);
+            else
+              recursiveFindPatternSegmentsByMatchingPatternItems(restShorterPattern,
+                      restLongerPattern, currentSegmentation, resultList);
+
+            // remove added segments so that we are in the same
+            // state as before the recursion:
+            if (addedLeftSegmentation)
+              currentSegmentation.remove(currentSegmentation.size() - 1); // remove the left side
+            // segment
+            currentSegmentation.remove(currentSegmentation.size() - 1); // remove
+            // the
+            // matched
+            // segment
+          }
+        }
+      }
+      if (!matched) // add remaining items of both lists in one pattern
+      // segment
+      {
+        ArrayList<RapierPatternSegment> newSegmentation = new ArrayList<RapierPatternSegment>();
+        newSegmentation.addAll(currentSegmentation);
+
+        RapierPatternSegment lastSegment = new RapierPatternSegment();
+        for (TextRulerRuleItem i : shorterPattern)
+          lastSegment.shorterPattern.add(i);
+        for (TextRulerRuleItem i : longerPattern)
+          lastSegment.longerPattern.add(i);
+        newSegmentation.add(lastSegment);
+        resultList.add(newSegmentation);
+      }
+    }
+  }
+
+  private static ArrayList<TextRulerRulePattern> getGeneralizationsForPatternSegmentation(
+          ArrayList<RapierPatternSegment> patternSegmentation) {
+    // for creating those, we need a table:
+    // each segment of the patternSegmentation creates a bunch of possible
+    // new generalized sub patterns (that's the inner
+    // ArrayList<TextRulerRulePattern>)
+    // since we have a whole sequence of pattern semgents (a whole
+    // segmentation), we need the outer ArrayList to collect
+    // all generalizations of all pattern segments:
+    ArrayList<ArrayList<TextRulerRulePattern>> generalizationTable = new ArrayList<ArrayList<TextRulerRulePattern>>();
+
+    // now, we create all generalizations of each pattern segment and
+    // collect them in that table:
+    for (RapierPatternSegment pSeg : patternSegmentation) {
+      ArrayList<TextRulerRulePattern> pSegGeneralizations;
+
+      if (pSeg.longerPattern.size() == pSeg.shorterPattern.size())
+        pSegGeneralizations = getGeneralizationsForRuleItemPatternsOfEqualSize(pSeg.longerPattern,
+                pSeg.shorterPattern);
+      else
+        pSegGeneralizations = getGeneralizationsForRuleItemPatternsOfDistinctSize(
+                pSeg.longerPattern, pSeg.shorterPattern);
+
+      generalizationTable.add(pSegGeneralizations);
+    }
+
+    // finally, we have to build all combinations of them in form of
+    // MLRulePatterns:
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>(); // the
+    // result
+    // is
+    // a
+    // list
+    // of
+    // new
+    // generalized
+    // patterns
+
+    recursiveBuildAllRuleItemCombinationsFromPatterns(generalizationTable, 0,
+            new TextRulerRulePattern(), resultList);
+
+    return resultList;
+  }
+
+  private static void recursiveBuildAllRuleItemCombinationsFromPatterns(
+          ArrayList<ArrayList<TextRulerRulePattern>> table, int curIndex,
+          TextRulerRulePattern currentPattern, ArrayList<TextRulerRulePattern> resultPatterns) {
+    if (curIndex >= table.size()) {
+      // make a deep copy of the current pattern:
+      TextRulerRulePattern copy = new TextRulerRulePattern();
+      for (TextRulerRuleItem item : currentPattern)
+        copy.add(item.copy());
+      resultPatterns.add(copy);
+    } else {
+      for (TextRulerRulePattern pattern : table.get(curIndex)) {
+        currentPattern.addAll(pattern);
+        recursiveBuildAllRuleItemCombinationsFromPatterns(table, curIndex + 1, currentPattern,
+                resultPatterns);
+        for (int i = 0; i < pattern.size(); i++)
+          currentPattern.remove(currentPattern.size() - 1);
+      }
+    }
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- WORKING ON PATTERNS OF DISTINCT LENGTH - OPTIMIZED
+  // ---------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+
+  // "optimized", because this method uses the pattern item matching
+  // optimization (search for equal items and make a segmentation, etc.)
+  private static ArrayList<TextRulerRulePattern> getOptimizedGeneralizationsForRuleItemPatternsOfDistinctSize(
+          TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+    ArrayList<ArrayList<RapierPatternSegment>> matchedCombinationList = new ArrayList<ArrayList<RapierPatternSegment>>();
+
+    // in order to reduce the amount of possible combinations how to combine
+    // the elements of the shorter pattern with the
+    // elements of the longer pattern, we first search for equal pattern
+    // items in both patterns. those equal patterns get hardwired
+    // in the combination and the left and right remaining subpatterns stay
+    // as a "divided smaller problem" that needs to be conquered...
+    // the result of the search is a list of possible segmentations of the
+    // patterns. all semgementations are those "smaller"
+    // left problems that we then need to generalize in the original manner.
+    // if no equal items are found, one segmentation with only one segment
+    // (the orignal longer and shoter pattern) is returned and
+    // has to be generalized.
+    if (pattern1.size() > pattern2.size())
+      recursiveFindPatternSegmentsByMatchingPatternItems(pattern1, pattern2,
+              new ArrayList<RapierPatternSegment>(), matchedCombinationList);
+    else
+      recursiveFindPatternSegmentsByMatchingPatternItems(pattern2, pattern1,
+              new ArrayList<RapierPatternSegment>(), matchedCombinationList);
+
+    // if (TextRulerToolkit.DEBUG && matchedCombinationList.size() > 1)
+    // {
+    // TextRulerToolkit.log("PATTERN SEQUENCES FOUND: "+matchedCombinationList.size());
+    // for (ArrayList<RapierPatternSegment> patternSequence :
+    // matchedCombinationList)
+    // {
+    // TextRulerToolkit.log("\tNEXT SEQUENCE");
+    // for (RapierPatternSegment pSeg : patternSequence)
+    // {
+    // TextRulerToolkit.log("\t\t"+pSeg.longerPattern);
+    // TextRulerToolkit.log("\t\t"+pSeg.shorterPattern);
+    // }
+    // }
+    // }
+
+    ArrayList<TextRulerRulePattern> resultList = new ArrayList<TextRulerRulePattern>();
+
+    for (ArrayList<RapierPatternSegment> patternSegmentation : matchedCombinationList) {
+      // TODO filter out possible duplicates ?
+      resultList.addAll(getGeneralizationsForPatternSegmentation(patternSegmentation));
+    }
+    return resultList;
+  }
+
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+  // --- THE FINAL RESULT: a genarlization method
+  // -------------------------------------------------------------------------------------------
+  // ----------------------------------------------------------------------------------------------------------------------------------------
+
+  // input: two sequences of rule items (=patterns) that shall be
+  // generalized... matchings are searched for a optimized search
+  // and to get a not too big count of generalizations...
+  // result: a (probably very large!) list of possible generalizations, e.g.
+  // used for all slotfiller generalizations of two rules...
+  public static ArrayList<TextRulerRulePattern> getGeneralizationsForRuleItemPatterns(
+          TextRulerRulePattern pattern1, TextRulerRulePattern pattern2) {
+    ArrayList<TextRulerRulePattern> result = new ArrayList<TextRulerRulePattern>();
+
+    // if (TextRulerToolkit.DEBUG)
+    // {
+    // TextRulerToolkit.log("\tgetGeneralizationsForRuleItemPatterns:");
+    // TextRulerToolkit.log("\tPattern1:"+pattern1);
+    // TextRulerToolkit.log("\tPattern2:"+pattern2);
+    // }
+
+    if (pattern1.size() == 0 && pattern2.size() == 0) {
+      return result; // return empty list
+    } else if (pattern1.size() == pattern2.size()) // both have the same
+    // pattern item count
+    {
+      // generalizing is easy then: simply generalize each pair of items:
+      result = getGeneralizationsForRuleItemPatternsOfEqualSize(pattern1, pattern2);
+    } else {
+      // TextRulerToolkit.logIf(TextRulerToolkit.DEBUG && pattern1.size()
+      // == 0 || pattern2.size() == 0, "SpecialCaseWithZeroLength");
+      result = getOptimizedGeneralizationsForRuleItemPatternsOfDistinctSize(pattern1, pattern2);
+    }
+
+    // if (TextRulerToolkit.DEBUG)
+    // {
+    // TextRulerToolkit.log("\t\tGeneralizations: "+result.size());
+    // for (TextRulerRulePattern lggPattern : result)
+    // TextRulerToolkit.log("\t\t\t"+lggPattern);
+    // }
+
+    return result;
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierGeneralizationHelper.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierGeneralizationHelper.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternItemMapping.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternItemMapping.java?rev=1152792&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternItemMapping.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternItemMapping.java Mon Aug  1 14:21:12 2011
@@ -0,0 +1,5 @@
+package org.apache.uima.tm.textruler.rapier;
+
+public class RapierPatternItemMapping extends RapierPatternSegment {
+
+}

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternItemMapping.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternItemMapping.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternSegment.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternSegment.java?rev=1152792&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternSegment.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternSegment.java Mon Aug  1 14:21:12 2011
@@ -0,0 +1,25 @@
+package org.apache.uima.tm.textruler.rapier;
+
+import org.apache.uima.tm.textruler.core.TextRulerRuleItem;
+import org.apache.uima.tm.textruler.core.TextRulerRulePattern;
+import org.apache.uima.tm.textruler.core.TextRulerToolkit;
+
+public class RapierPatternSegment {
+
+  TextRulerRulePattern shorterPattern = new TextRulerRulePattern();
+
+  TextRulerRulePattern longerPattern = new TextRulerRulePattern();
+
+  public void debugOutput() {
+
+    TextRulerToolkit.log("\n-------------\nShorterList: ");
+    for (TextRulerRuleItem t : shorterPattern)
+      System.out.print(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + "    ");
+    TextRulerToolkit.log("");
+
+    System.out.print("LongerList: ");
+    for (TextRulerRuleItem t : longerPattern)
+      System.out.print(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + "    ");
+    TextRulerToolkit.log("");
+  }
+}

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternSegment.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPatternSegment.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPlugin.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPlugin.java?rev=1152792&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPlugin.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPlugin.java Mon Aug  1 14:21:12 2011
@@ -0,0 +1,54 @@
+package org.apache.uima.tm.textruler.rapier;
+
+import org.eclipse.core.runtime.Plugin;
+import org.osgi.framework.BundleContext;
+
+/**
+ * The activator class controls the plug-in life cycle.
+ */
+public class RapierPlugin extends Plugin {
+
+  // The plug-in ID
+  public static final String PLUGIN_ID = "org.apache.uima.tm.textruler.rapier";
+
+  // The shared instance
+  private static RapierPlugin plugin;
+
+  /**
+   * The constructor
+   */
+  public RapierPlugin() {
+  }
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see org.eclipse.core.runtime.Plugins#start(org.osgi.framework.BundleContext)
+   */
+  @Override
+  public void start(BundleContext context) throws Exception {
+    super.start(context);
+    plugin = this;
+  }
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see org.eclipse.core.runtime.Plugin#stop(org.osgi.framework.BundleContext)
+   */
+  @Override
+  public void stop(BundleContext context) throws Exception {
+    plugin = null;
+    super.stop(context);
+  }
+
+  /**
+   * Returns the shared instance
+   * 
+   * @return the shared instance
+   */
+  public static RapierPlugin getDefault() {
+    return plugin;
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPlugin.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPlugin.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPreferencePage.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPreferencePage.java?rev=1152792&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPreferencePage.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPreferencePage.java Mon Aug  1 14:21:12 2011
@@ -0,0 +1,92 @@
+package org.apache.uima.tm.textruler.rapier;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.uima.tm.textruler.TextRulerPlugin;
+import org.apache.uima.tm.textruler.extension.TextRulerController;
+import org.apache.uima.tm.textruler.extension.TextRulerLearnerController;
+import org.apache.uima.tm.textruler.extension.TextRulerLearnerFactory;
+import org.apache.uima.tm.textruler.extension.TextRulerLearnerParameter;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditor;
+import org.eclipse.jface.preference.FieldEditorPreferencePage;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.jface.preference.StringFieldEditor;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+
+public class RapierPreferencePage extends FieldEditorPreferencePage implements
+        IWorkbenchPreferencePage {
+
+  public static String ID = "org.apache.uima.tm.textruler.algorithmPages";
+
+  private TextRulerLearnerController algorithmController;
+
+  private IPreferenceStore store;
+
+  private ArrayList<FieldEditor> fields = new ArrayList<FieldEditor>();
+
+  public RapierPreferencePage() {
+    super(FieldEditorPreferencePage.GRID);TextRulerLearnerController ctrl = TextRulerController.getControllerForID("org.apache.uima.tm.textruler.rapier");
+    this.algorithmController = ctrl;
+    store = TextRulerPlugin.getDefault().getPreferenceStore();
+    setPreferenceStore(store);
+  }
+  
+  @Override
+  public void init(IWorkbench workbench) {
+  }
+
+  protected void createFieldEditors() {
+    TextRulerLearnerFactory f = algorithmController.getFactory();
+    TextRulerLearnerParameter[] params = f.getAlgorithmParameters();
+    Map<String, Object> values = f.getAlgorithmParameterStandardValues();
+    if (params != null) {
+      for (int i = 0; i < params.length; i++) {
+        TextRulerLearnerParameter p = params[i];
+        String id = algorithmController.getID() + "." + p.id;
+        FieldEditor l = null;
+        switch (p.type) {
+          case ML_BOOL_PARAM: {
+            l = new BooleanFieldEditor(id, p.name, getFieldEditorParent());
+            fields.add(l);
+            addField(l);
+            store.setDefault(id, (Boolean) values.get(p.id));
+            l.setPreferenceStore(store);
+            break;
+          }
+
+          case ML_FLOAT_PARAM:
+          case ML_INT_PARAM:
+          case ML_STRING_PARAM: {
+            l = new StringFieldEditor(id, p.name, getFieldEditorParent());
+            fields.add(l);
+            addField(l);
+            store.setDefault(id, values.get(p.id).toString());
+            l.setPreferenceStore(store);
+            break;
+          }
+          case ML_SELECT_PARAM:
+            break;
+        }
+      }
+    }
+  }
+
+  @Override
+  protected void performDefaults() {
+    for (FieldEditor f : fields)
+      f.loadDefault();
+    // super.performDefaults();
+  }
+
+  @Override
+  public boolean performOk() {
+    for (FieldEditor f : fields)
+      f.store();
+    // return super.performOk();
+    return true;
+  }
+}
\ No newline at end of file

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPreferencePage.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textruler.rapier/src/main/java/org/apache/uima/tm/textruler/rapier/RapierPreferencePage.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain