You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2013/06/10 10:05:56 UTC

svn commit: r1491365 - in /uima/sandbox/ruta/trunk/ruta-ep-textruler: ./ src/main/java/org/apache/uima/ruta/textruler/learner/kep/

Author: pkluegl
Date: Mon Jun 10 08:05:55 2013
New Revision: 1491365

URL: http://svn.apache.org/r1491365
Log:
UIMA-2860
- inital import of kep project to textruler project

Added:
    uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/
    uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPFactory.java
    uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPLearner.java
    uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPPreferencePage.java
    uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRule.java
    uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItem.java
    uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItemCondition.java
Modified:
    uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml

Modified: uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml?rev=1491365&r1=1491364&r2=1491365&view=diff
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml (original)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml Mon Jun 10 08:05:55 2013
@@ -233,6 +233,23 @@ under the License.
       </page>
    </extension>
    <extension
+         point="org.eclipse.ui.preferencePages">
+      <page
+            category="org.apache.uima.ruta.textruler"
+            class="org.apache.uima.ruta.textruler.learner.kep.KEPPreferencePage"
+            id="org.apache.uima.ruta.textruler.kep"
+            name="KEP">
+      </page>
+   </extension>
+   <extension
+         point="org.apache.uima.ruta.textruler.learners">
+      <learner
+            class="org.apache.uima.ruta.textruler.learner.kep.KEPFactory"
+            id="org.apache.uima.ruta.textruler.kep"
+            name="KEP">
+      </learner>
+   </extension>
+   <extension
          point="org.eclipse.core.runtime.preferences">
       <initializer
             class="org.apache.uima.ruta.textruler.preferences.TextRulerPreferenceInitializer">

Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPFactory.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPFactory.java?rev=1491365&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPFactory.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPFactory.java Mon Jun 10 08:05:55 2013
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.kep;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.uima.ruta.textruler.extension.TextRulerLearner;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerFactory;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerParameter;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerParameter.MLAlgorithmParamType;
+
+public class KEPFactory implements TextRulerLearnerFactory {
+
+  public KEPFactory() {
+  }
+
+  public TextRulerLearner createAlgorithm(String inputFolderPath, String additionalFolderPath,
+          String preprocessorTMfile, String tempFolderPath, String[] fullSlotTypeNames,
+          Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
+    return new KEPLearner(inputFolderPath, preprocessorTMfile, tempFolderPath, fullSlotTypeNames,
+            filterSet, skip, delegate);
+  }
+
+  public Map<String, Object> getAlgorithmParameterStandardValues() {
+    Map<String, Object> result = new HashMap<String, Object>();
+//    result.put(KEPLearner.FILLER_WINDOW, KEPLearner.DEFAULT_FILLER_WINDOW);
+//    result.put(KEPLearner.MAX_FILLER_LENGTH, KEPLearner.DEFAULT_MAX_FILLER_LENGTH);
+    result.put(KEPLearner.MAX_EXPAND_RULES, KEPLearner.DEFAULT_MAX_EXPAND_RULES);
+    result.put(KEPLearner.MAX_INFILLER_RULES, KEPLearner.DEFAULT_MAX_INFILLER_RULES);
+    return result;
+  }
+
+  public TextRulerLearnerParameter[] getAlgorithmParameters() {
+    TextRulerLearnerParameter[] result = new TextRulerLearnerParameter[2];
+//    result[0] = new TextRulerLearnerParameter(KEPLearner.FILLER_WINDOW,
+//            "fillerWindow", MLAlgorithmParamType.ML_INT_PARAM);
+//    result[1] = new TextRulerLearnerParameter(KEPLearner.MAX_FILLER_LENGTH,
+//            "maxFillerLength", MLAlgorithmParamType.ML_INT_PARAM);
+    result[0] = new TextRulerLearnerParameter(KEPLearner.MAX_EXPAND_RULES,
+            "maxExpandRules", MLAlgorithmParamType.ML_INT_PARAM);
+    result[1] = new TextRulerLearnerParameter(KEPLearner.MAX_INFILLER_RULES,
+            "maxInfillerRules", MLAlgorithmParamType.ML_INT_PARAM);
+    return result;
+  }
+
+}

Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPLearner.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPLearner.java?rev=1491365&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPLearner.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPLearner.java Mon Jun 10 08:05:55 2013
@@ -0,0 +1,1173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.kep;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.ConstraintFactory;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.FSMatchConstraint;
+import org.apache.uima.cas.FSTypeConstraint;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.ruta.engine.RutaEngine;
+import org.apache.uima.ruta.textruler.core.TextRulerBasicLearner;
+import org.apache.uima.ruta.textruler.core.TextRulerExample;
+import org.apache.uima.ruta.textruler.core.TextRulerExampleDocument;
+import org.apache.uima.ruta.textruler.core.TextRulerRuleItem;
+import org.apache.uima.ruta.textruler.core.TextRulerRulePattern;
+import org.apache.uima.ruta.textruler.core.TextRulerStatisticsCollector;
+import org.apache.uima.ruta.textruler.core.TextRulerTarget;
+import org.apache.uima.ruta.textruler.core.TextRulerToolkit;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
+import org.apache.uima.ruta.textruler.learner.kep.KEPRuleItemCondition.Condition;
+
+public class KEPLearner extends TextRulerBasicLearner {
+
+  public static final String MAX_EXPAND_RULES = "maxExpandRules";
+
+  public static final String MAX_INFILLER_RULES = "maxInfillerRules";
+
+  public static final String FILLER_WINDOW = "fillerWindow";
+
+  public static final String MAX_FILLER_LENGTH = "maxFillerLength";
+  
+  public static final int DEFAULT_MAX_EXPAND_RULES = 50;
+
+  public static final int DEFAULT_MAX_INFILLER_RULES = 10;
+
+  public static final int DEFAULT_FILLER_WINDOW = 5;
+
+  public static final int DEFAULT_MAX_FILLER_LENGTH = 3;
+
+  private int fillerWindow;
+  
+  private int maxFillerLength;
+  
+  private int maxInfillerRules;
+  
+  private int maxExpandRules;
+  
+  private Map<String, List<KEPRule>> ruleLists = new HashMap<String, List<KEPRule>>();
+
+  private Map<String, List<KEPRule>> correctionRules = new HashMap<String, List<KEPRule>>();
+
+  private Map<String, List<TextRulerExample>> coveredExamples = new HashMap<String, List<TextRulerExample>>();
+
+  private Map<String, Type> blocks = new HashMap<String, Type>();
+
+  private String[] slotNamesWithBoundaries;
+
+  private Map<String, Boolean> hasPerfectRules = new HashMap<String, Boolean>();
+
+  public KEPLearner(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
+          Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
+    super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, skip, delegate);
+  }
+
+  @Override
+  protected void doRun() {
+
+    long startTime = System.nanoTime();
+
+    this.exampleDocuments.clearCurrentExamples();
+    prepareCachedCASesWithBoundaries();
+    this.slotNamesWithBoundaries = new String[slotNames.length * 3];
+    for (int i = 0; i < this.slotNames.length; i++) {
+      this.slotNamesWithBoundaries[i * 3] = slotNames[i];
+      this.slotNamesWithBoundaries[i * 3 + 1] = slotNames[i]
+              + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION;
+      this.slotNamesWithBoundaries[i * 3 + 2] = slotNames[i]
+              + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION;
+
+    }
+    for (int i = 0; i < this.slotNamesWithBoundaries.length; i++) {
+      if (!filterSetWithSlotNames.contains(slotNamesWithBoundaries[i]))
+        this.filterSetWithSlotNames.add(slotNamesWithBoundaries[i]);
+      initializeMapEntries(this.slotNamesWithBoundaries[i]);
+    }
+    for (int i = 0; i < this.slotNamesWithBoundaries.length; i++) {
+      runForSlot(this.slotNamesWithBoundaries[i]);
+      if (slotNamesWithBoundaries[i].contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION)) {
+        if (hasPerfectRules.get(slotNamesWithBoundaries[i - 2]))
+          filterSetWithSlotNames.remove(slotNamesWithBoundaries[i - 2]);
+        if (hasPerfectRules.get(slotNamesWithBoundaries[i - 1]))
+          filterSetWithSlotNames.remove(slotNamesWithBoundaries[i - 1]);
+        if (hasPerfectRules.get(slotNamesWithBoundaries[i]))
+          filterSetWithSlotNames.remove(slotNamesWithBoundaries[i]);
+      }
+      if (shouldAbort())
+        return;
+    }
+    removeBadRules();
+    for (int i = 0; i < this.slotNamesWithBoundaries.length; i++) {
+      List<KEPRule> list = this.ruleLists.get(slotNamesWithBoundaries[i]);
+      if (!shouldAbort() && list != null && !list.isEmpty()) {
+        this.exampleDocuments.createExamplesForTarget(list.get(0).getTarget());
+        if (!hasPerfectRules.get(slotNamesWithBoundaries[i]))
+          makeRemovalRules(list.get(0).getTarget());
+        list = getOptimalRuleCombination(list);
+      }
+    }
+    removeBadRules();
+
+    long estimatedTime = (System.nanoTime() - startTime) / 1000000000;
+    System.out.println(estimatedTime + " seconds needed to learn all rules");
+    sendStatusUpdateToDelegate("Done", TextRulerLearnerState.ML_DONE, true);
+
+  }
+
+  /**
+   * Execute algorithms for slot denoted by slotName
+   * 
+   * @param slotName
+   *          the name of a slot
+   */
+  private void runForSlot(String slotName) {
+
+    sendStatusUpdateToDelegate("Working on " + slotName, TextRulerLearnerState.ML_RUNNING, true);
+    TextRulerTarget target = new TextRulerTarget(slotName,
+            TextRulerTarget.MLTargetType.SINGLE_WHOLE_SLOT, this);
+    this.exampleDocuments.createExamplesForTarget(target);
+    if (!shouldAbort())
+      blocks.put(slotName, getBlocks());
+    if (!shouldAbort())
+      learnRules(target);
+    this.ruleLists.put(slotName, getOptimalRuleCombination(this.ruleLists.get(slotName)));
+    sendStatusUpdateToDelegate(slotName + " done", TextRulerLearnerState.ML_RUNNING, true);
+  }
+
+  private Type getBlocks() {
+    sendStatusUpdateToDelegate("Searching for Blocks", TextRulerLearnerState.ML_RUNNING, false);
+    Map<String, List<TextRulerExample>> exampleMap = new HashMap<String, List<TextRulerExample>>();
+    Map<String, Double> lengthMap = new HashMap<String, Double>();
+    Map<String, Integer> countMap = new HashMap<String, Integer>();
+    for (TextRulerExampleDocument exampleDocument : exampleDocuments.getDocuments()) {
+      for (AnnotationFS annotation : exampleDocument.getCAS().getAnnotationIndex()) {
+        for (TextRulerExample example : exampleDocument.getPositiveExamples()) {
+          if (annotation.getBegin() <= example.getAnnotation().getBegin()
+                  && annotation.getEnd() >= example.getAnnotation().getEnd()
+                  && annotation.getEnd() - annotation.getBegin() > example.getAnnotation().getEnd()
+                          - example.getAnnotation().getBegin()
+                  && !filterSetWithSlotNames.contains(annotation.getType().getName())) {
+            List<TextRulerExample> list = exampleMap.get(annotation.getType().getName());
+            if (list == null)
+              list = new ArrayList<TextRulerExample>();
+            if (!list.contains(example))
+              list.add(example);
+            exampleMap.put(annotation.getType().getName(), list);
+          }
+        }
+        double aLength = (double) (annotation.getEnd() - annotation.getBegin());
+        lengthMap.put(annotation.getType().getName(),
+                lengthMap.get(annotation.getType().getName()) == null ? aLength : lengthMap
+                        .get(annotation.getType().getName())
+                        + aLength);
+        countMap.put(annotation.getType().getName(),
+                countMap.get(annotation.getType().getName()) == null ? 1 : countMap.get(annotation
+                        .getType().getName()) + 1);
+      }
+    }
+
+    List<Type> result = new ArrayList<Type>();
+    for (String typeString : exampleMap.keySet()) {
+      if (exampleMap.get(typeString).size() == exampleDocuments.getAllPositiveExamples().size())
+        result.add(exampleDocuments.getDocuments().get(0).getCAS().getTypeSystem().getType(
+                typeString));
+    }
+    double exLength = 0;
+    for (TextRulerExample ex : exampleDocuments.getAllPositiveExamples()) {
+      exLength += (double) (ex.getAnnotation().getEnd() - ex.getAnnotation().getBegin());
+    }
+    double bestRatio = 0;
+    Type bestType = null;
+    for (Type type : result) {
+      if ((exLength / lengthMap.get(type.getName()) > bestRatio && countMap.get(type.getName()) <= exampleDocuments
+              .getAllPositiveExamples().size())
+              || (exLength / lengthMap.get(type.getName()) == bestRatio
+                      && countMap.get(type.getName()) > countMap.get(bestType.getName()) && countMap
+                      .get(type.getName()) <= exampleDocuments.getAllPositiveExamples().size())) {
+        bestType = type;
+        bestRatio = exLength / lengthMap.get(type.getName());
+      }
+
+    }
+    sendStatusUpdateToDelegate("Searching for Blocks done", TextRulerLearnerState.ML_RUNNING, true);
+    return bestType;
+  }
+
+  private void learnRules(TextRulerTarget target) {
+
+    List<KEPRule> ruleList = this.ruleLists.get(target.getSingleSlotTypeName());
+    List<TextRulerExample> coveredExamples = this.coveredExamples.get(target
+            .getSingleSlotTypeName());
+    List<TextRulerExample> positiveExamples = this.exampleDocuments.getAllPositiveExamples();
+
+    for (TextRulerExample e : positiveExamples) {
+      if (!coveredExamples.contains(e)) {
+        ruleList.addAll(makeInFillerRulesForExample(e));
+        // ruleList.addAll(generalizeForRepitition(ruleList));
+      }
+      for (KEPRule rule : ruleList)
+        for (TextRulerExample ex : rule.getCoveringStatistics().getCoveredPositiveExamples())
+          if (!coveredExamples.contains(ex))
+            coveredExamples.add(ex);
+
+      if (shouldAbort())
+        return;
+    }
+    ruleList.addAll(getCandidateClassificationRules(target));
+    ruleList = getBestAndOptimalRules(ruleList);
+    ruleList.addAll(makePostFillers(ruleList, true));
+    ruleList = getOptimalRuleCombination(ruleList);
+    this.ruleLists.put(target.getSingleSlotTypeName(), ruleList);
+  }
+
+  private List<KEPRule> makeInFillerRulesForExample(TextRulerExample e) {
+    sendStatusUpdateToDelegate("Searching for Infiller Rules for "
+            + e.getTarget().getSingleSlotTypeName().substring(
+                    e.getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1),
+            TextRulerLearnerState.ML_RUNNING, false);
+    Collection<KEPRule> rules = new HashSet<KEPRule>();
+
+    rules = new HashSet<KEPRule>();
+    rules = expandInFillerRules(e, rules, true);
+    if (rules.size() > maxInfillerRules) {
+      List<KEPRule> list = new ArrayList<KEPRule>(rules);
+      Collections.sort(list, new KEPRuleComparator(e.getDocumentCAS()));
+      // TODO this is a parameter!
+      rules = new HashSet<KEPRule>(list.subList(0, maxInfillerRules));
+    }
+    List<KEPRule> result = new ArrayList<KEPRule>(rules);
+    if (shouldAbort())
+      return result;
+
+    testRulesOnDocumentSet(result, exampleDocuments);
+    sendStatusUpdateToDelegate("Searching for Infiller Rules for "
+            + e.getTarget().getSingleSlotTypeName().substring(
+                    e.getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1) + " done",
+            TextRulerLearnerState.ML_RUNNING, true);
+    return new ArrayList<KEPRule>(rules);
+  }
+
+  private Collection<KEPRule> expandInFillerRules(TextRulerExample e, Collection<KEPRule> rules,
+          boolean expanding) {
+
+    if (rules.size() > maxExpandRules) {
+      List<KEPRule> list = new ArrayList<KEPRule>(rules);
+      Collections.sort(list, new KEPRuleComparator(e.getDocumentCAS()));
+      rules = new HashSet<KEPRule>(list.subList(0, maxExpandRules));
+    }
+    if (!expanding) {
+      return rules;
+    }
+
+    Collection<KEPRule> expandedRules = new HashSet<KEPRule>();
+    if (rules.isEmpty()) {
+      List<AnnotationFS> seeds = getAnnotationsStartingAt(e.getDocumentCAS(), e.getAnnotation()
+              .getBegin(), e.getAnnotation().getEnd());
+      for (AnnotationFS each : seeds) {
+        KEPRuleItem item = new KEPRuleItem(each);
+        KEPRule rule = new KEPRule(this, e.getTarget());
+        rule.addInFillerItem(item);
+        expandedRules.add(rule);
+      }
+    } else {
+      expanding = false;
+      for (KEPRule eachRule : rules) {
+        TextRulerRulePattern inFiller = eachRule.getInFiller();
+        KEPRuleItem lastItem = (KEPRuleItem) inFiller.get(inFiller.size() - 1);
+        int end = lastItem.getEnd();
+        if (end >= e.getAnnotation().getEnd()) {
+          if (!expandedRules.contains(eachRule)) {
+            expandedRules.add(eachRule);
+          }
+        } else {
+          List<AnnotationFS> annotationsStartingAt = getAnnotationsStartingAt(e.getDocumentCAS(),
+                  end, e.getAnnotation().getEnd());
+          if (annotationsStartingAt.isEmpty() && !expandedRules.contains(eachRule)) {
+            expandedRules.add(eachRule);
+          } else {
+            expanding = true;
+            for (AnnotationFS eachFS : annotationsStartingAt) {
+              if (eachFS.getType().getName().equals(lastItem.getType().getName())) {
+                lastItem.setAnnotation(eachFS);
+                lastItem.setMax(lastItem.getMax() + 1).setReluctant(true);
+                expandedRules.add(eachRule);
+              } else {
+                KEPRule newRule = new KEPRule(eachRule);
+                KEPRuleItem newItem = new KEPRuleItem(eachFS);
+                newRule.addInFillerItem(newItem);
+                expandedRules.add(newRule);
+              }
+            }
+          }
+        }
+      }
+    }
+    return expandInFillerRules(e, expandedRules, expanding);
+  }
+
+  private List<KEPRule> getCandidateClassificationRules(TextRulerTarget target) {
+    sendStatusUpdateToDelegate("Searching for Candidate Classification Rules for "
+            + target.getSingleSlotTypeName().substring(
+                    target.getSingleSlotTypeName().lastIndexOf(".") + 1),
+            TextRulerLearnerState.ML_RUNNING, false);
+    List<KEPRule> result = new ArrayList<KEPRule>();
+    List<Type> types = getTokensInNExamples(exampleDocuments.getAllPositiveExamples(),
+            exampleDocuments.getAllPositiveExamples().size() / 2, true);
+
+    for (Type type : types) {
+      result.add(new KEPRule(this, target).addInFillerItem(new KEPRuleItem(type)));
+    }
+    testRulesOnDocumentSet(result, exampleDocuments);
+    // result = getBestAndOptimalRules(result);
+    result = addConditions(result, target);
+    sendStatusUpdateToDelegate("Searching for Candidate Classification Rules for "
+            + target.getSingleSlotTypeName().substring(
+                    target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done",
+            TextRulerLearnerState.ML_RUNNING, true);
+    return result;
+  }
+
+  private List<KEPRule> addConditions(List<KEPRule> rules, TextRulerTarget target) {
+    List<KEPRule> result = new ArrayList<KEPRule>();
+    List<TextRulerExample> allCoveredExamples = getCoveredExamples(rules);
+
+    List<Type> containedTypes = getTokensInNExamples(exampleDocuments.getAllPositiveExamples(),
+            exampleDocuments.getAllPositiveExamples().size() / 3, false);
+    if (!containedTypes.isEmpty()) {
+      for (KEPRule rule : rules) {
+        KEPRuleItem ruleItem = (KEPRuleItem) rule.getInFiller().get(0);
+        for (Type type : containedTypes) {
+          if (!type.getName().equals(ruleItem.getType().getName())
+                  && !ruleItem.containsAndCondition(type)) {
+            result.add(new KEPRule(this, target).addInFillerItem(ruleItem.copy().addAndCondition(
+                    new KEPRuleItemCondition(type, Condition.CONTAINS, false))));
+          }
+        }
+      }
+    }
+
+    testRulesOnDocumentSet(result, exampleDocuments);
+    result = getBestAndOptimalRules(result);
+    List<KEPRule> toRefine = new ArrayList<KEPRule>();
+    List<KEPRule> toRemove = new ArrayList<KEPRule>();
+    for (KEPRule rule : result) {
+      if (rule.getCoveringStatistics().getCoveredPositivesCount() == 0)
+        toRemove.add(rule);
+      else if (rule.getCoveringStatistics().getCoveredNegativesCount() > 0
+              && rule.getPostFiller().size() < 5)
+        toRefine.add(rule);
+    }
+    result.removeAll(toRemove);
+    result.removeAll(toRefine);
+    result = getBestAndOptimalRules(result);
+    if (getCoveredExamples(result).size() == allCoveredExamples.size()) {
+      return result;
+    }
+    if (toRefine.size() > 0) {
+      result.addAll(addConditions(toRefine, target));
+    }
+    sendStatusUpdateToDelegate("Adding conditions to rules for "
+            + target.getSingleSlotTypeName().substring(
+                    target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done",
+            TextRulerLearnerState.ML_RUNNING, true);
+    return result;
+  }
+
+  private List<KEPRule> makePostFillers(List<KEPRule> baseRules, boolean changed) {
+    if (!baseRules.isEmpty() && !shouldAbort()) {
+      sendStatusUpdateToDelegate("Adding postfillers to rules for "
+              + baseRules.get(0).getTarget().getSingleSlotTypeName().substring(
+                      baseRules.get(0).getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1),
+              TextRulerLearnerState.ML_RUNNING, true);
+    } else {
+      return new ArrayList<KEPRule>();
+    }
+//    List<TextRulerExample> allCoveredExamples = getCoveredExamples(baseRules);
+    Set<KEPRule> result = new HashSet<KEPRule>();
+    for (KEPRule rule : baseRules) {
+      for (TextRulerExample e : rule.getCoveringStatistics().getCoveredPositiveExamples()) {
+        if (rule.getCoveringStatistics().getCoveredNegativesCount() > 0) {
+          KEPRuleItem lastItem = ((KEPRuleItem) rule.getPostFiller().lastItem());
+          int end = e.getAnnotation().getEnd();
+          if (lastItem != null) {
+            end = lastItem.getEnd();
+          }
+          List<AnnotationFS> annotations = getAnnotationsStartingAt(e.getDocumentCAS(), end, e
+                  .getDocumentCAS().getDocumentText().length());
+          boolean blockBoundaryHit = false;
+          for (AnnotationFS annotationFS : annotations) {
+            if (annotationFS.getType().equals(
+                    blocks.get(baseRules.get(0).getTarget().getSingleSlotTypeName()))) {
+              blockBoundaryHit = true;
+              break;
+            }
+          }
+          if (blockBoundaryHit) {
+            continue;
+          }
+          for (AnnotationFS annotationFS : annotations) {
+            if (annotationFS.getType().getName().equals(
+                    lastItem != null ? lastItem.getType().getName() : null)) {
+              lastItem.setReluctant(true).setMax(lastItem.getMax() + 1).setAnnotation(annotationFS);
+            } else {
+              result.add(rule.copy().addPostFillerItem(new KEPRuleItem(annotationFS)));
+            }
+            if (rule.getPreFiller().isEmpty())
+              result.add(rule);
+          }
+        } else {
+          result.add(rule);
+        }
+      }
+    }
+    List<KEPRule> resultList = new ArrayList<KEPRule>(result);
+    testRulesOnDocumentSet(resultList, exampleDocuments);
+    resultList.addAll(baseRules);
+    resultList = getBestAndOptimalRules(resultList);
+    if (baseRules.containsAll(resultList)) {
+      if (!changed)
+        return resultList;
+      else
+        changed = false;
+    } else {
+      changed = true;
+    }
+    resultList.addAll(makePreFillers(resultList, changed));
+    sendStatusUpdateToDelegate("Adding postfillers to rules for "
+            + baseRules.get(0).getTarget().getSingleSlotTypeName().substring(
+                    baseRules.get(0).getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1)
+            + " done", TextRulerLearnerState.ML_RUNNING, true);
+    return resultList;
+  }
+
+  private List<KEPRule> makePreFillers(List<KEPRule> baseRules, boolean changed) {
+    if (!baseRules.isEmpty() && !shouldAbort()) {
+      sendStatusUpdateToDelegate("Adding prefillers to rules for "
+              + baseRules.get(0).getTarget().getSingleSlotTypeName().substring(
+                      baseRules.get(0).getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1),
+              TextRulerLearnerState.ML_RUNNING, true);
+    } else {
+      return new ArrayList<KEPRule>();
+    }
+//    List<TextRulerExample> allCoveredExamples = getCoveredExamples(baseRules);
+    Set<KEPRule> result = new HashSet<KEPRule>();
+    for (KEPRule rule : baseRules) {
+      for (TextRulerExample e : rule.getCoveringStatistics().getCoveredPositiveExamples()) {
+        if (rule.getCoveringStatistics().getCoveredNegativesCount() > 0) {
+          int begin = e.getAnnotation().getBegin();
+          KEPRuleItem firstItem = (KEPRuleItem) rule.getPreFiller().firstItem();
+          if (firstItem != null) {
+            begin = firstItem.getBegin();
+          }
+          List<AnnotationFS> annotations = getAnnotationsEndingAt(begin, e.getDocumentCAS());
+          boolean blockBoundaryHit = false;
+          for (AnnotationFS annotationFS : annotations) {
+            if (annotationFS.getType().equals(
+                    blocks.get(baseRules.get(0).getTarget().getSingleSlotTypeName()))) {
+              blockBoundaryHit = true;
+              break;
+            }
+          }
+          if (blockBoundaryHit) {
+            continue;
+          }
+          for (AnnotationFS annotationFS : annotations) {
+            if (annotationFS.getType().getName().equals(
+                    firstItem != null ? firstItem.getType().getName() : null)) {
+              firstItem.setReluctant(true).setMax(firstItem.getMax() + 1).setAnnotation(
+                      annotationFS);
+            } else {
+              result.add(rule.copy().addPreFillerItem(new KEPRuleItem(annotationFS)));
+            }
+          }
+        } else {
+          result.add(rule);
+        }
+      }
+    }
+    List<KEPRule> resultList = new ArrayList<KEPRule>(result);
+    testRulesOnDocumentSet(resultList, exampleDocuments);
+    resultList.addAll(baseRules);
+    resultList = getBestAndOptimalRules(resultList);
+    if (baseRules.containsAll(resultList)) {
+      if (!changed)
+        return resultList;
+      else
+        changed = false;
+    } else {
+      changed = true;
+    }
+    resultList.addAll(makePostFillers(resultList, changed));
+
+    sendStatusUpdateToDelegate("Adding prefillers to rules for "
+            + baseRules.get(0).getTarget().getSingleSlotTypeName().substring(
+                    baseRules.get(0).getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1)
+            + " done", TextRulerLearnerState.ML_RUNNING, true);
+    return resultList;
+  }
+
+  private void removeBadRules() {
+    for (int i = 0; i < slotNames.length; i++) {
+      if (!hasPerfectRules.get(slotNames[i])
+              && hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 1])
+              && hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 2])) {
+        List<KEPRule> list = new ArrayList<KEPRule>();
+        for (KEPRule kepRule : ruleLists.get(slotNames[i])) {
+          List<TextRulerExample> exList = new ArrayList<TextRulerExample>(kepRule
+                  .getCoveringStatistics().getCoveredNegativeExamples());
+          exList.removeAll(getCorrectedExamples(slotNames[i]));
+          if (exList.size() == 0) {
+            list.add(kepRule);
+          }
+        }
+        ruleLists.put(slotNames[i], list);
+      } else {
+        if (!hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 1])) {
+          List<KEPRule> list = new ArrayList<KEPRule>();
+          for (KEPRule kepRule : ruleLists.get(slotNamesWithBoundaries[3 * i + 1])) {
+            List<TextRulerExample> exList = new ArrayList<TextRulerExample>(kepRule
+                    .getCoveringStatistics().getCoveredNegativeExamples());
+            exList.removeAll(getCorrectedExamples(slotNamesWithBoundaries[3 * i + 1]));
+            if (exList.size() == 0) {
+              list.add(kepRule);
+            }
+          }
+        }
+        if (!hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 2])) {
+          List<KEPRule> list = new ArrayList<KEPRule>();
+          for (KEPRule kepRule : ruleLists.get(slotNamesWithBoundaries[3 * i + 2])) {
+            List<TextRulerExample> exList = new ArrayList<TextRulerExample>(kepRule
+                    .getCoveringStatistics().getCoveredNegativeExamples());
+            exList.removeAll(getCorrectedExamples(slotNamesWithBoundaries[3 * i + 2]));
+            if (exList.size() == 0) {
+              list.add(kepRule);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  private List<KEPRule> makeRemovalRules(TextRulerTarget target) {
+    sendStatusUpdateToDelegate("Searching for Removal Rules for "
+            + target.getSingleSlotTypeName().substring(
+                    target.getSingleSlotTypeName().lastIndexOf(".") + 1),
+            TextRulerLearnerState.ML_RUNNING, false);
+    if (!hasFalsePositives(target.getSingleSlotTypeName()))
+      return new ArrayList<KEPRule>();
+    List<KEPRule> result = correctionRules.get(target.getSingleSlotTypeName());
+    Type targetType = exampleDocuments.getDocuments().get(0).getCAS().getTypeSystem().getType(
+            target.getSingleSlotTypeName());
+
+    List<Type> containedTypes = getTokensInNExamples(exampleDocuments.getAllPositiveExamples(),
+            exampleDocuments.getAllPositiveExamples().size(), false);
+    List<Type> notContainedTypes = getTokensInNoExample(exampleDocuments.getAllPositiveExamples());
+    // notContainedTypes.retainAll(getTokensInNExamples(getFalsePositives(target), 1, false));
+    if (!containedTypes.isEmpty()) {
+      KEPRuleItem containsRuleItem = new KEPRuleItem(targetType);
+      for (Type type : containedTypes) {
+        result.add(new KEPRule(this, target).addInFillerItem(
+                containsRuleItem.copy().addAndCondition(
+                        new KEPRuleItemCondition(type, Condition.CONTAINS, true)))
+                .setCorrectionRule(true));
+      }
+    }
+    if (!notContainedTypes.isEmpty()) {
+      KEPRuleItem notContainsRuleItem = new KEPRuleItem(targetType);
+      for (Type type : notContainedTypes) {
+        result.add(new KEPRule(this, target).addInFillerItem(
+                notContainsRuleItem.copy().addAndCondition(
+                        new KEPRuleItemCondition(type, Condition.CONTAINS, false)))
+                .setCorrectionRule(true));
+      }
+    }
+    testCorrectionRules(target);
+    List<KEPRule> toRemove = new ArrayList<KEPRule>();
+    List<KEPRuleItemCondition> toMerge = new ArrayList<KEPRuleItemCondition>();
+    for (KEPRule rule : result) {
+      if (!(rule.getCoveringStatistics().getCoveredPositivesCount() == 0)
+              || !(rule.getCoveringStatistics().getCoveredNegativesCount() > 0)) {
+        toRemove.add(rule);
+      } else {
+        toMerge.addAll(((KEPRuleItem) rule.getInFiller().get(0)).getConditions().get(0));
+        toRemove.add(rule);
+      }
+    }
+    result.removeAll(toRemove);
+    if (!toMerge.isEmpty()) {
+      result.add(new KEPRule(this, target).addInFillerItem(
+              new KEPRuleItem(targetType).addConditions(toMerge)).setCorrectionRule(true));
+      testCorrectionRules(target);
+    }
+    sendStatusUpdateToDelegate("Searching for Removal Rules for "
+            + target.getSingleSlotTypeName().substring(
+                    target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done",
+            TextRulerLearnerState.ML_RUNNING, true);
+    return result;
+  }
+
+  private void initializeMapEntries(String slotName) {
+    this.ruleLists.put(slotName, new ArrayList<KEPRule>());
+    this.correctionRules.put(slotName, new ArrayList<KEPRule>());
+    this.coveredExamples.put(slotName, new ArrayList<TextRulerExample>());
+    this.hasPerfectRules.put(slotName, false);
+  }
+
+  private List<AnnotationFS> getAnnotationsEndingAt(int end, CAS cas) {
+    List<AnnotationFS> result = new ArrayList<AnnotationFS>();
+    FSIterator<AnnotationFS> it = cas.getAnnotationIndex(
+            cas.getTypeSystem().getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME)).iterator();
+    while (it.isValid() && it.get().getBegin() < end) {
+      it.moveToNext();
+    }
+    do
+      it.moveToPrevious();
+    while (it.isValid()
+            && (it.get().getBegin() >= end || filterSetWithSlotNames.contains(it.get().getType()
+                    .getName())));
+    if (!it.isValid())
+      return result;
+    end = it.get().getEnd();
+    it = cas.getAnnotationIndex().iterator();
+    while (it.isValid() && it.get().getBegin() <= end) {
+      if (it.get().getEnd() == end
+              && !filterSetWithSlotNames.contains(it.get().getType().getName()))
+        result.add(it.get());
+      it.moveToNext();
+    }
+    return result;
+  }
+
+  private List<AnnotationFS> getAnnotationsStartingAt(CAS cas, int begin, int till) {
+    List<AnnotationFS> result = new ArrayList<AnnotationFS>();
+    if (begin > cas.getDocumentText().length()) {
+      return new ArrayList<AnnotationFS>();
+    }
+    if (begin == 0) {
+      begin++;
+    }
+    AnnotationFS pointer = cas.createAnnotation(cas.getAnnotationType(), begin - 1, begin);
+    FSIterator<AnnotationFS> it = cas.getAnnotationIndex().iterator(pointer);
+    FSMatchConstraint constraint = getConstraint(cas);
+    FSIterator<AnnotationFS> iterator = cas.createFilteredIterator(it, constraint);
+    iterator.moveTo(pointer);
+
+    int firstBegin = -1;
+    while (iterator.isValid()) {
+      AnnotationFS fs = iterator.get();
+      if (firstBegin == -1 && fs.getBegin() >= begin) {
+        firstBegin = fs.getBegin();
+      }
+      if (firstBegin >= 0) {
+        if (fs.getBegin() > firstBegin) {
+          break;
+        } else if (fs.getBegin() == firstBegin && fs.getEnd() <= till) {
+          if (!filterSetWithSlotNames.contains(fs.getType().getName())) {
+            result.add(fs);
+          }
+        }
+      }
+      iterator.moveToNext();
+    }
+    return result;
+  }
+
+  private List<KEPRule> getOptimalRuleCombination(List<KEPRule> rules) {
+
+    if (rules.isEmpty()) {
+      return new ArrayList<KEPRule>();
+    }
+    List<KEPRule> tmpList = new ArrayList<KEPRule>();
+    List<TextRulerExample> coveredExamples = new ArrayList<TextRulerExample>();
+    List<TextRulerExample> positiveExamples = exampleDocuments.getAllPositiveExamples();
+    List<TextRulerExample> correctedExamples = getCorrectedExamples(rules.get(0).getTarget()
+            .getSingleSlotTypeName());
+    for (KEPRule rule : rules) {
+      List<TextRulerExample> uncorrectedExamples = new ArrayList<TextRulerExample>(rule
+              .getCoveringStatistics().getCoveredNegativeExamples());
+      uncorrectedExamples.removeAll(correctedExamples);
+      if (uncorrectedExamples.size() == 0
+              && rule.getCoveringStatistics().getCoveredPositivesCount() > 0)
+        tmpList.add(rule);
+    }
+    tmpList = getBestRules(tmpList);
+    for (KEPRule rule : tmpList) {
+      coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples());
+    }
+    if (coveredExamples.containsAll(exampleDocuments.getAllPositiveExamples()))
+      hasPerfectRules.put(rules.get(0).getTarget().getSingleSlotTypeName(), true);
+    else
+      hasPerfectRules.put(rules.get(0).getTarget().getSingleSlotTypeName(), false);
+    List<KEPRule> bestRules = getBestRules(rules);
+    while (!coveredExamples.containsAll(positiveExamples) && !bestRules.isEmpty()) {
+      KEPRule bestRule = bestRules.get(0);
+      if (!coveredExamples.containsAll(bestRule.getCoveringStatistics()
+              .getCoveredPositiveExamples())) {
+        coveredExamples.removeAll(bestRule.getCoveringStatistics().getCoveredPositiveExamples());
+        coveredExamples.addAll(bestRule.getCoveringStatistics().getCoveredPositiveExamples());
+        tmpList.add(bestRule);
+      }
+      bestRules.remove(0);
+    }
+    return tmpList;
+
+  }
+
+  private List<KEPRule> getBestRules(List<KEPRule> rules) {
+    if (rules.isEmpty())
+      return new ArrayList<KEPRule>();
+    final class AComparator implements Comparator<KEPRule> {
+      public int compare(KEPRule r1, KEPRule r2) {
+        if (r1.getCoveringStatistics().getCoveredPositivesCount() < r2.getCoveringStatistics()
+                .getCoveredPositivesCount())
+          return 1;
+        else if (r1.getCoveringStatistics().getCoveredPositivesCount() > r2.getCoveringStatistics()
+                .getCoveredPositivesCount())
+          return -1;
+        else if (r1.getCoveringStatistics().getCoveredNegativesCount() > r2.getCoveringStatistics()
+                .getCoveredNegativesCount())
+          return 1;
+        else if (r1.getCoveringStatistics().getCoveredNegativesCount() < r2.getCoveringStatistics()
+                .getCoveredNegativesCount())
+          return -1;
+        else if (r1.getPreFiller().size() + r1.getInFiller().size() + r1.getPostFiller().size() < r2
+                .getPreFiller().size()
+                + r2.getInFiller().size() + r2.getPostFiller().size())
+          return -1;
+        return 0;
+      }
+    }
+
+    Collections.sort(rules, new AComparator());
+    List<KEPRule> result = new ArrayList<KEPRule>();
+    List<TextRulerExample> positiveExamples = exampleDocuments.getAllPositiveExamples();
+    List<TextRulerExample> coveredExamples = new ArrayList<TextRulerExample>();
+    for (int i = 0; i < rules.size(); i++) {
+      KEPRule rule = rules.get(i);
+      if ((3 * rule.getCoveringStatistics().getCoveredPositivesCount() >= rule
+              .getCoveringStatistics().getCoveredNegativesCount())
+              && (rule.getCoveringStatistics().getCoveredPositivesCount() >= positiveExamples
+                      .size() || !coveredExamples.containsAll(rule.getCoveringStatistics()
+                      .getCoveredPositiveExamples()))) {
+        result.add(rule);
+        coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples());
+
+        if (coveredExamples.containsAll(positiveExamples))
+          return result;
+      }
+    }
+    for (int i = 0; i < rules.size(); i++) {
+      KEPRule rule = rules.get(i);
+      if (rule.getCoveringStatistics().getCoveredPositivesCount() >= positiveExamples.size()
+              || !coveredExamples.containsAll(rule.getCoveringStatistics()
+                      .getCoveredPositiveExamples())) {
+        result.add(rule);
+        coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples());
+
+        if (coveredExamples.containsAll(positiveExamples))
+          return result;
+      }
+    }
+    return result;
+  }
+
+  private List<KEPRule> getBestAndOptimalRules(List<KEPRule> rules) {
+    List<KEPRule> result = new ArrayList<KEPRule>();
+
+    result.addAll(getBestRules(rules));
+    List<KEPRule> tmp = getOptimalRuleCombination(rules);
+    for (KEPRule rule : tmp)
+      if (!result.contains(rule))
+        result.add(rule);
+    return result;
+  }
+
+  private List<Type> getTokensInNExamples(List<TextRulerExample> examples, int n,
+          boolean countOnlyCoveringTokens) {
+    if (examples.isEmpty())
+      return new ArrayList<Type>();
+    List<Type> result = new ArrayList<Type>();
+    Map<String, List<TextRulerExample>> countMap = new HashMap<String, List<TextRulerExample>>();
+    for (TextRulerExample example : examples) {
+      for (AnnotationFS a : TextRulerToolkit.getAnnotationsWithinBounds(example.getDocumentCAS(),
+              example.getAnnotation().getBegin(), example.getAnnotation().getEnd(),
+              filterSetWithSlotNames, null)) {
+        if (!filterSetWithSlotNames.contains(a.getType().getName()))
+          if (((!countOnlyCoveringTokens) && (a.getBegin() >= example.getAnnotation().getBegin() && a
+                  .getEnd() <= example.getAnnotation().getEnd()))
+                  || (a.getBegin() == example.getAnnotation().getBegin() && a.getEnd() == example
+                          .getAnnotation().getEnd())) {
+            List<TextRulerExample> list = countMap.get(a.getType().getName());
+            if (list == null) {
+              list = new ArrayList<TextRulerExample>();
+              list.add(example);
+            } else if (!list.contains(example))
+              list.add(example);
+            countMap.put(a.getType().getName(), list);
+          }
+      }
+    }
+    for (String typeString : countMap.keySet()) {
+      if (countMap.get(typeString).size() >= n)
+        result.add(examples.get(0).getDocumentCAS().getTypeSystem().getType(typeString));
+    }
+    return result;
+  }
+
+  private List<Type> getTokensInNoExample(List<TextRulerExample> examples) {
+    List<String> types = new ArrayList<String>();
+    for (TextRulerExampleDocument doc : exampleDocuments.getDocuments()) {
+      for (AnnotationFS a : doc.getCAS().getAnnotationIndex()) {
+        if (!types.contains(a.getType().getName())
+                && !filterSetWithSlotNames.contains(a.getType().getName()))
+          types.add(a.getType().getName());
+      }
+    }
+    List<Type> containedTypes = getTokensInNExamples(examples, 1, false);
+    for (Type type : containedTypes) {
+      types.remove(type.getName());
+    }
+    List<Type> result = new ArrayList<Type>();
+    for (String typeString : types) {
+      result.add(examples.get(0).getDocumentCAS().getTypeSystem().getType(typeString));
+    }
+    return result;
+  }
+
+  public String getResultString() {
+    StringBuffer ruleStrings = new StringBuffer();
+    if (slotNamesWithBoundaries == null || slotNamesWithBoundaries.length == 0)
+      return "No results available yet!";
+
+    for (int i = 0; i < slotNamesWithBoundaries.length; i++) {
+      List<KEPRule> ruleList = this.ruleLists.get(slotNamesWithBoundaries[i]);
+      Type blockType = blocks.get(slotNamesWithBoundaries[i]);
+      if (blockType != null
+              && !(i > 0 && blocks.get(slotNamesWithBoundaries[i - 1]) != null && blocks.get(
+                      slotNamesWithBoundaries[i - 1]).getName().equals(blockType.getName()))) {
+        ruleStrings.append("BLOCK(" + blockType.getShortName() + ") " + blockType.getShortName()
+                + "{} { \n");
+      }
+      if (ruleList == null || ruleList.isEmpty()) {
+        if (blockType != null
+                && !(i < slotNamesWithBoundaries.length - 1
+                        && blocks.get(slotNamesWithBoundaries[i + 1]) != null && blocks.get(
+                        slotNamesWithBoundaries[i + 1]).getName().equals(blockType.getName())))
+          ruleStrings.append("} \n");
+        continue;
+      }
+      ruleStrings.append("// " + slotNamesWithBoundaries[i] + " RULES \n");
+      for (KEPRule rule : new ArrayList<KEPRule>(ruleList)) {
+        ruleStrings.append((blockType != null ? "\t" : "") + rule.getRuleString() + "\t// "
+                + rule.getCoveringStatistics() + "\n");
+      }
+      if (blockType != null
+              && !(i < slotNamesWithBoundaries.length - 1
+                      && blocks.get(slotNamesWithBoundaries[i + 1]) != null && blocks.get(
+                      slotNamesWithBoundaries[i + 1]).getName().equals(blockType.getName())))
+        ruleStrings.append("}");
+      ruleStrings.append("\n");
+    }
+    StringBuffer boundaryCorrectors = new StringBuffer();
+    StringBuffer wholeSlotCorrectors = new StringBuffer();
+    boundaryCorrectors.append("\n // BOUNDARY CORRECTION RULES: \n");
+    wholeSlotCorrectors.append("\n // CORRECTION RULES: \n");
+    for (int i = 0; i < slotNamesWithBoundaries.length; i++) {
+      List<KEPRule> ruleList = this.correctionRules.get(slotNamesWithBoundaries[i]);
+      if (ruleList == null || ruleList.isEmpty())
+        continue;
+      for (KEPRule rule : ruleList) {
+        if (slotNamesWithBoundaries[i].contains(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION)
+                || slotNamesWithBoundaries[i].contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION)) {
+          boundaryCorrectors.append(rule.getRuleString() + "\t// " + rule.getCoveringStatistics()
+                  + "\n");
+        } else {
+          wholeSlotCorrectors.append(rule.getRuleString() + "\t// " + rule.getCoveringStatistics()
+                  + "\n");
+        }
+      }
+    }
+    return getFileHeaderString(true) + ruleStrings + boundaryCorrectors + "\n // CONNECTORS: \n"
+            + getConnectorsRuleString() + wholeSlotCorrectors;
+  }
+
+  private String getAnnotationRulesString(String slotName) {
+    StringBuffer result = new StringBuffer();
+    result.append(getPackageString());
+    result.append("// " + slotName + " RULES \n");
+    Type blockType = blocks.get(slotName);
+    if (blockType != null) {
+      result.append("BLOCK(" + blockType.getShortName() + ") " + blockType.getShortName()
+              + "{} { \n");
+    }
+    List<KEPRule> ruleList = this.ruleLists.get(slotName);
+    if (ruleList != null && !ruleList.isEmpty()) {
+
+      for (KEPRule rule : ruleList) {
+        String theRuleString = rule.getRuleString();
+        result.append((blockType != null ? "\t" : "") + theRuleString + "\t// "
+                + rule.getCoveringStatistics() + "\n");
+      }
+    }
+    if (blockType != null) {
+      result.append("}");
+    }
+    result.append("\n");
+
+    if (!slotName.contains(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION)
+            && !slotName.contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION)) {
+      result.append(getAnnotationRulesString(slotName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION));
+      result.append(getAnnotationRulesString(slotName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION));
+      if (blockType != null) {
+        result.append("BLOCK(" + blockType.getShortName() + "Correction) "
+                + blockType.getShortName() + "{} { \n");
+      }
+      String shortName = slotName.substring(slotName.lastIndexOf(".") + 1);
+      String str = shortName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION + "{->MARKONCE("
+              + shortName + ",1,3)} ANY*? " + shortName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION
+              + ";" + "\n";
+      str += shortName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION + "{IS(" + shortName
+              + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION + ")->MARKONCE(" + shortName + ")} "
+              + ";" + "\n";
+      result.append(str);
+      if (blockType != null) {
+        result.append("}");
+      }
+    }
+
+    return result.toString();
+  }
+
+  private String getConnectorsRuleString() {
+
+    Map<Type, StringBuffer> connectorBlocks = new HashMap<Type, StringBuffer>();
+    StringBuffer noBlockConnectorRules = new StringBuffer();
+    StringBuffer result = new StringBuffer();
+
+    for (int i = 0; i < this.slotNames.length; i++) {
+      Type slotBlock = blocks.get(slotNames[i]);
+      String shortName = slotNames[i].substring(slotNames[i].lastIndexOf(".") + 1);
+      String str = (slotBlock == null ? "" : "\t") + shortName
+              + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION + "{->MARKONCE(" + shortName
+              + ",1,3)} ANY*? " + shortName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION + ";"
+              + "\n";
+      str += (slotBlock == null ? "" : "\t") + shortName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION
+              + "{IS(" + shortName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION + ")->MARKONCE("
+              + shortName + ")} " + ";" + "\n";
+      if (slotBlock != null) {
+        if (connectorBlocks.get(slotBlock) == null) {
+          connectorBlocks.put(slotBlock, new StringBuffer());
+        }
+        connectorBlocks.get(slotBlock).append(str);
+      } else {
+        noBlockConnectorRules.append(str);
+      }
+    }
+
+    for (Type block : connectorBlocks.keySet()) {
+      result.append("BLOCK(" + block.getShortName() + "Connectors) " + block.getShortName()
+              + "{} { \n" + connectorBlocks.get(block) + "} \n");
+    }
+    result.append(noBlockConnectorRules);
+    return result.toString();
+  }
+
+  private List<TextRulerExample> getCoveredExamples(List<KEPRule> rules) {
+    List<TextRulerExample> result = new ArrayList<TextRulerExample>();
+    for (KEPRule rule : rules)
+      for (TextRulerExample ex : rule.getCoveringStatistics().getCoveredPositiveExamples())
+        if (!result.contains(ex))
+          result.add(ex);
+    return result;
+  }
+
+  private List<TextRulerExample> getCorrectedExamples(String slotName) {
+    List<TextRulerExample> result = new ArrayList<TextRulerExample>();
+    for (KEPRule rule : correctionRules.get(slotName))
+      for (TextRulerExample ex : rule.getCoveringStatistics().getCoveredNegativeExamples())
+        if (!result.contains(ex))
+          result.add(ex);
+    return result;
+  }
+
+  private boolean hasFalsePositives(String singleSlotTypeName) {
+    List<KEPRule> list = this.ruleLists.get(singleSlotTypeName);
+    if (list == null || list.isEmpty())
+      return false;
+    for (KEPRule kepRule : list) {
+      if (kepRule.getCoveringStatistics().getCoveredNegativesCount() > 0)
+        return true;
+    }
+    return false;
+  }
+
+  public void testCorrectionRules(TextRulerTarget target) {
+    if (shouldAbort())
+      return;
+    String rStr = getAnnotationRulesString(target.getSingleSlotTypeName());
+    for (TextRulerExampleDocument doc : exampleDocuments.getDocuments()) {
+      CAS processedCAS = applyScriptOnDocument(rStr, doc, target);
+      TextRulerStatisticsCollector scriptStatistics = new TextRulerStatisticsCollector();
+      compareOriginalDocumentWithTestCAS(doc, processedCAS, target, scriptStatistics,
+              collectNegativeCoveredInstancesWhenTesting());
+      for (KEPRule cRule : correctionRules.get(target.getSingleSlotTypeName())) {
+        if (shouldAbort())
+          break;
+        if (cRule.getCoveringStatistics() == null) {
+          cRule.setCoveringStatistics(new TextRulerStatisticsCollector());
+        }
+        processedCAS = applyScriptOnDocument(rStr, doc, target);
+        TextRulerStatisticsCollector correctedStats = new TextRulerStatisticsCollector();
+        testRuleOnDocument(cRule, doc, correctedStats, processedCAS);
+        for (TextRulerExample ex : scriptStatistics.getCoveredNegativeExamples()) {
+          if (!correctedStats.getCoveredNegativeExamples().contains(ex)) {
+            cRule.getCoveringStatistics().addCoveredNegative(ex);
+          }
+        }
+        for (TextRulerExample ex : scriptStatistics.getCoveredPositiveExamples()) {
+          if (!correctedStats.getCoveredPositiveExamples().contains(ex)) {
+            cRule.getCoveringStatistics().addCoveredPositive(ex);
+          }
+        }
+        cRule.getCoveringStatistics().reflectCountsFromCoveredExamples();
+      }
+    }
+  }
+
+  private void prepareCASWithBoundaries(CAS cas) {
+    for (String slotName : slotNames)
+      if (!(slotName.contains(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION) || slotName
+              .contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION)))
+        TextRulerExampleDocument.createBoundaryAnnotationsForCas(cas, slotName, filterSet);
+  }
+
+  @Override
+  public CAS loadCAS(String fileName, CAS reuseCAS) {
+    CAS cas = super.loadCAS(fileName, reuseCAS);
+    prepareCASWithBoundaries(cas);
+    return cas;
+  }
+
+  private void prepareCachedCASesWithBoundaries() {
+    for (CAS cas : exampleDocuments.getCachedCASes())
+      prepareCASWithBoundaries(cas);
+  }
+
+  @Override
+  public boolean collectNegativeCoveredInstancesWhenTesting() {
+    return true;
+  }
+
+  public void setParameters(Map<String, Object> params) {
+    if (TextRulerToolkit.DEBUG)
+      saveParametersToTempFolder(params);
+
+    // TODO try catch
+    if (params.containsKey(FILLER_WINDOW))
+      fillerWindow = (Integer) params.get(FILLER_WINDOW);
+
+    if (params.containsKey(MAX_EXPAND_RULES))
+      maxExpandRules = (Integer) params.get(MAX_EXPAND_RULES);
+
+    if (params.containsKey(MAX_FILLER_LENGTH))
+      maxFillerLength = (Integer) params.get(MAX_FILLER_LENGTH);
+
+    if (params.containsKey(MAX_INFILLER_RULES))
+      maxInfillerRules = (Integer) params.get(MAX_INFILLER_RULES);
+
+  }
+
+  protected FSMatchConstraint getConstraint(CAS cas) {
+    ConstraintFactory cf = cas.getConstraintFactory();
+    final FSTypeConstraint constraint = cf.createTypeConstraint();
+
+    for (String each : getFilterSet()) {
+      constraint.add(each);
+    }
+    constraint.add(RutaEngine.BASIC_TYPE);
+    // TODO check if this is a legal alternative to "new NotConstraint(constraint)":
+    FSMatchConstraint result = new FSMatchConstraint() {
+      private static final long serialVersionUID = -6744378612440830298L;
+
+      private final FSTypeConstraint c = constraint;
+
+      public boolean match(FeatureStructure fs) {
+        return !c.match(fs);
+      }
+    };
+    return result;
+  }
+
+  public class KEPRuleComparator implements Comparator<KEPRule> {
+
+    private CAS cas;
+
+    public KEPRuleComparator(CAS cas) {
+      super();
+      this.cas = cas;
+    }
+
+    public int compare(KEPRule o1, KEPRule o2) {
+      ArrayList<TextRulerRuleItem> items1 = o1.getInFiller();
+      items1.addAll(o1.getPostFiller());
+      items1.addAll(o1.getPreFiller());
+      double occ1 = 0;
+      double occ2 = 0;
+      for (TextRulerRuleItem each : items1) {
+        KEPRuleItem eachItem = (KEPRuleItem) each;
+        int ratio = exampleDocuments.getAllPositiveExamples().size()
+                / cas.getAnnotationIndex(eachItem.getType()).size();
+        occ1 += (ratio < 1) ? 1 : ratio;
+      }
+      ArrayList<TextRulerRuleItem> items2 = o2.getInFiller();
+      items2.addAll(o2.getPostFiller());
+      items2.addAll(o2.getPreFiller());
+      for (TextRulerRuleItem each : items2) {
+        KEPRuleItem eachItem = (KEPRuleItem) each;
+        int ratio = exampleDocuments.getAllPositiveExamples().size()
+                / cas.getAnnotationIndex(eachItem.getType()).size();
+        occ2 += (ratio < 1) ? 1 : ratio;
+      }
+      double v1 = occ1;
+      double v2 = occ2;
+      if (v1 > v2) {
+        return 1;
+      } else if (v2 > v1) {
+        return -1;
+      } else {
+        return 0;
+      }
+    }
+  }
+
+}
\ No newline at end of file

Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPPreferencePage.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPPreferencePage.java?rev=1491365&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPPreferencePage.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPPreferencePage.java Mon Jun 10 08:05:55 2013
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.textruler.learner.kep;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.uima.ruta.textruler.TextRulerPlugin;
+import org.apache.uima.ruta.textruler.extension.TextRulerController;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerController;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerFactory;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerParameter;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditor;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.jface.preference.PreferencePage;
+import org.eclipse.jface.preference.StringFieldEditor;
+import org.eclipse.swt.SWT;
+import org.eclipse.swt.layout.GridData;
+import org.eclipse.swt.layout.GridLayout;
+import org.eclipse.swt.widgets.Composite;
+import org.eclipse.swt.widgets.Control;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+public class KEPPreferencePage extends PreferencePage implements IWorkbenchPreferencePage {
+
+  public static String ID = "org.apache.uima.ruta.textruler.preference.kep";
+
+  private TextRulerLearnerController algorithmController;
+
+  private IPreferenceStore store;
+
+  private ArrayList<FieldEditor> fields = new ArrayList<FieldEditor>();
+
+  public KEPPreferencePage() {
+    TextRulerLearnerController ctrl = TextRulerController
+            .getControllerForID("org.apache.uima.ruta.textruler.kep");
+    this.algorithmController = ctrl;
+    store = TextRulerPlugin.getDefault().getPreferenceStore();
+    setPreferenceStore(store);
+  }
+
+  @Override
+  protected Control createContents(Composite parent) {
+    Composite top = new Composite(parent, SWT.LEFT);
+    top.setLayoutData(new GridData(GridData.FILL_HORIZONTAL));
+    top.setLayout(new GridLayout());
+
+    TextRulerLearnerFactory f = algorithmController.getFactory();
+    TextRulerLearnerParameter[] params = f.getAlgorithmParameters();
+    Map<String, Object> values = f.getAlgorithmParameterStandardValues();
+    if (params != null) {
+      for (int i = 0; i < params.length; i++) {
+        TextRulerLearnerParameter p = params[i];
+        String id = algorithmController.getID() + "." + p.id;
+        FieldEditor l = null;
+        switch (p.type) {
+          case ML_BOOL_PARAM: {
+            l = new BooleanFieldEditor(id, p.name, top);
+            fields.add(l);
+            store.setDefault(id, (Boolean) values.get(p.id));
+            l.setPreferenceStore(store);
+            l.load();
+            break;
+          }
+
+          case ML_FLOAT_PARAM:
+          case ML_INT_PARAM:
+          case ML_STRING_PARAM: {
+            l = new StringFieldEditor(id, p.name, top);
+            fields.add(l);
+            store.setDefault(id, values.get(p.id).toString());
+            l.setPreferenceStore(store);
+            l.load();
+            break;
+          }
+          case ML_SELECT_PARAM:
+            break;
+        }
+      }
+    }
+    return top;
+  }
+
+  public void init(IWorkbench workbench) {
+  }
+
+  @Override
+  protected void performDefaults() {
+    for (FieldEditor f : fields)
+      f.loadDefault();
+    // super.performDefaults();
+  }
+
+  @Override
+  public boolean performOk() {
+    for (FieldEditor f : fields)
+      f.store();
+    // return super.performOk();
+    return true;
+  }
+}

Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRule.java?rev=1491365&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRule.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRule.java Mon Jun 10 08:05:55 2013
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.kep;
+
+import org.apache.uima.ruta.textruler.core.TextRulerBasicLearner;
+import org.apache.uima.ruta.textruler.core.TextRulerMultiSlotRule;
+import org.apache.uima.ruta.textruler.core.TextRulerRuleItem;
+import org.apache.uima.ruta.textruler.core.TextRulerRulePattern;
+import org.apache.uima.ruta.textruler.core.TextRulerSlotPattern;
+import org.apache.uima.ruta.textruler.core.TextRulerTarget;
+
+public class KEPRule extends TextRulerMultiSlotRule {
+
+  private boolean isCorrectionRule = false;
+
+  public KEPRule(KEPRule copyFrom) {
+    super(copyFrom);
+    this.isCorrectionRule = copyFrom.isCorrectionRule;
+  }
+
+  public KEPRule(TextRulerBasicLearner parentAlgorithm, TextRulerTarget target) {
+    super(parentAlgorithm, target);
+    slotPatterns.add(new TextRulerSlotPattern());
+  }
+
+  @Override
+  public KEPRule copy() {
+    return new KEPRule(this);
+  }
+
+  public double getLaplacian() {
+    int e = 0;
+    int n = 0;
+
+    if (coveringStatistics != null) {
+      e = coveringStatistics.getCoveredNegativesCount();
+      n = coveringStatistics.getCoveredNegativesCount()
+              + coveringStatistics.getCoveredPositivesCount();
+    }
+    return ((double) e + 1) / ((double) n + 1);
+  }
+
+  public boolean containsTerm(KEPRuleItem term) {
+    for (TextRulerSlotPattern sp : slotPatterns) {
+      for (TextRulerRuleItem i : sp.preFillerPattern)
+        if (i.equals(term))
+          return true;
+      for (TextRulerRuleItem i : sp.fillerPattern)
+        if (i.equals(term))
+          return true;
+      for (TextRulerRuleItem i : sp.postFillerPattern)
+        if (i.equals(term))
+          return true;
+    }
+    return false;
+  }
+
+  public KEPRuleItem searchNeighborOfItem(KEPRuleItem item, boolean goToLeft) {
+    int slotIndex = -1;
+    int patternIndex = -1;
+    int slotI = 0;
+    for (TextRulerSlotPattern sp : slotPatterns) {
+      for (TextRulerRuleItem it : sp.preFillerPattern) {
+        if (it == item) {
+          slotIndex = slotI;
+          patternIndex = 0; // 0=preFiller
+          break;
+        }
+      }
+      if (slotIndex < 0) {
+        for (TextRulerRuleItem it : sp.fillerPattern) {
+          if (it == item) {
+            slotIndex = slotI;
+            patternIndex = 1; // 1=filler
+            break;
+          }
+        }
+      }
+      if (slotIndex < 0) {
+        for (TextRulerRuleItem it : sp.postFillerPattern) {
+          if (it == item) {
+            slotIndex = slotI;
+            patternIndex = 2; // 2=postFiller
+            break;
+          }
+        }
+      }
+      if (slotIndex >= 0) {
+        break;
+      }
+    }
+    if (slotIndex < 0) // we didn't even find the item in our rule ?! how
+      // can this happen ?
+      return null;
+
+    TextRulerRulePattern currentPattern = getPattern(slotIndex, patternIndex);
+    while (currentPattern != null) {
+      int startIndex = currentPattern.indexOf(item); // this is only >= 0
+      // for the first
+      // pattern...
+      if (!goToLeft) // walk forward...
+      {
+        int startSearchFromIndex = startIndex + 1;
+        if (startSearchFromIndex < currentPattern.size())
+          return (KEPRuleItem) currentPattern.get(startSearchFromIndex);
+        else // skip to next pattern
+        {
+          patternIndex++;
+          if (patternIndex > 2) {
+            patternIndex = 0;
+            slotIndex++;
+            if (slotIndex >= slotPatterns.size())
+              return null; // not found!
+          }
+          currentPattern = getPattern(slotIndex, patternIndex);
+        }
+      } else {
+        int startSearchFromIndex = startIndex >= 0 ? startIndex - 1 : currentPattern.size() - 1;
+        if (startSearchFromIndex >= 0 && currentPattern.size() > 0)
+          return (KEPRuleItem) currentPattern.get(startSearchFromIndex);
+        else // skip to previous pattern
+        {
+          patternIndex--;
+          if (patternIndex < 0) {
+            patternIndex = 2;
+            slotIndex--;
+            if (slotIndex < 0)
+              return null; // not found!
+          }
+          currentPattern = getPattern(slotIndex, patternIndex);
+        }
+      }
+    }
+    return null;
+  }
+
+  private TextRulerRulePattern getPattern(int slotIndex, int patternIndex) {
+    TextRulerSlotPattern sp = slotPatterns.get(slotIndex);
+    if (patternIndex == 0)
+      return sp.preFillerPattern;
+    else if (patternIndex == 1)
+      return sp.fillerPattern;
+    else if (patternIndex == 2)
+      return sp.postFillerPattern;
+    else
+      return null;
+  }
+
+  public KEPRule addPostFillerItem(KEPRuleItem item) {
+    this.getPostFiller().add(item);
+    setNeedsCompile(true);
+    return this;
+  }
+
+  public KEPRule addInFillerItem(KEPRuleItem item) {
+    this.getInFiller().add(item);
+    setNeedsCompile(true);
+    return this;
+  }
+
+  public KEPRule addPreFillerItem(KEPRuleItem item) {
+    this.getPreFiller().add(0, item);
+    setNeedsCompile(true);
+    return this;
+  }
+
+  public TextRulerRulePattern getPreFiller() {
+    return this.slotPatterns.get(0).preFillerPattern;
+  }
+
+  public void setPreFiller(TextRulerRulePattern preFiller) {
+    this.slotPatterns.get(0).preFillerPattern = preFiller;
+    setNeedsCompile(true);
+  }
+
+  public TextRulerRulePattern getInFiller() {
+    return this.slotPatterns.get(0).fillerPattern;
+  }
+
+  public void setInFiller(TextRulerRulePattern inFiller) {
+    this.slotPatterns.get(0).fillerPattern = inFiller;
+    setNeedsCompile(true);
+  }
+
+  public TextRulerRulePattern getPostFiller() {
+    return this.slotPatterns.get(0).postFillerPattern;
+  }
+
+  public void setPostFiller(TextRulerRulePattern postFiller) {
+    this.slotPatterns.get(0).postFillerPattern = postFiller;
+    setNeedsCompile(true);
+  }
+
+  public KEPRule setCorrectionRule(boolean isCorrectionRule) {
+    this.isCorrectionRule = isCorrectionRule;
+    setNeedsCompile(true);
+    return this;
+  }
+
+  public boolean isCorrectionRule() {
+    return isCorrectionRule;
+  }
+
+  public TextRulerTarget getTarget() {
+    return this.target;
+  }
+
+  public void setTarget(TextRulerTarget target) {
+    this.target = target;
+    setNeedsCompile(true);
+  }
+
+  public boolean coversSameExamples(KEPRule otherRule) {
+    if (otherRule.getCoveringStatistics().getCoveredPositivesCount() != this
+            .getCoveringStatistics().getCoveredPositivesCount()
+            || !otherRule.getCoveringStatistics().getCoveredPositiveExamples().containsAll(
+                    this.getCoveringStatistics().getCoveredPositiveExamples()))
+      return false;
+    return true;
+  }
+}
\ No newline at end of file

Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItem.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItem.java?rev=1491365&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItem.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItem.java Mon Jun 10 08:05:55 2013
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.kep;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
+import org.apache.uima.ruta.textruler.core.TextRulerRule;
+import org.apache.uima.ruta.textruler.core.TextRulerRuleItem;
+
+public class KEPRuleItem implements TextRulerRuleItem {
+
+  private boolean isStarWildCard = false;
+
+  private boolean isReluctant = false;
+
+  private Type type;
+
+  private TextRulerAnnotation annotation;
+
+  private int min = 1;
+
+  private int max = 1;
+
+  private List<List<KEPRuleItemCondition>> conditions = new ArrayList<List<KEPRuleItemCondition>>();
+
+  public KEPRuleItem(KEPRuleItem copyFrom) {
+    super();
+    this.annotation = copyFrom.annotation;
+    this.isStarWildCard = copyFrom.isStarWildCard;
+    this.type = copyFrom.type;
+    this.conditions = new ArrayList<List<KEPRuleItemCondition>>();
+    for (List<KEPRuleItemCondition> cList : copyFrom.conditions) {
+      this.conditions.add(new ArrayList<KEPRuleItemCondition>(cList));
+    }
+    this.isReluctant = copyFrom.isReluctant;
+    this.min = copyFrom.min;
+    this.max = copyFrom.max;
+  }
+
+  public KEPRuleItem(Type type) {
+    super();
+    this.type = type;
+  }
+
+  public KEPRuleItem(TextRulerAnnotation a) {
+    super();
+    this.type = a.getType();
+    this.annotation = a;
+  }
+
+  public KEPRuleItem(AnnotationFS afs) {
+    super();
+    this.annotation = new TextRulerAnnotation(afs);
+    this.type = this.annotation.getType();
+  }
+
+  public KEPRuleItem(Type type, String regExpString) {
+    super();
+    this.type = type;
+    List<KEPRuleItemCondition> list = new ArrayList<KEPRuleItemCondition>();
+    list.add(new KEPRuleItemCondition(regExpString));
+    this.conditions.add(list);
+  }
+
+  public KEPRuleItem() {
+    this.type = null;
+  }
+
+  public KEPRuleItem copy() {
+    return new KEPRuleItem(this);
+  }
+
+  public String getStringForRuleString(TextRulerRule rule, MLRuleItemType type,
+          int numberInPattern, int patternSize, int numberInRule, int ruleSize, int slotIndex) {
+
+    String mark = "";
+    KEPRule kepRule = (KEPRule) rule;
+    boolean isMarkingItem = type == MLRuleItemType.FILLER && numberInPattern == 0;
+    String cStr = "";
+
+    String anchor = (this.type == null ? "ANY" : this.type.getShortName())
+            + (isStarWildCard ? "*" : "")
+            + ((min == 1 && max == 1) ? "" : ("[" + min + "," + max + "]"))
+            + (isReluctant ? "?" : "") + ((isMarkingItem || !this.conditions.isEmpty()) ? "{" : "");
+
+    if (!this.conditions.isEmpty() && !this.conditions.get(0).isEmpty()) {
+      for (List<KEPRuleItemCondition> cList : this.conditions) {
+        if (cList.size() == 1) {
+          cStr += cList.get(0) + ", ";
+        } else {
+          cStr += "OR(";
+          for (KEPRuleItemCondition condition : cList) {
+            cStr += condition + ", ";
+          }
+          cStr = cStr.substring(0, cStr.lastIndexOf(","));
+          cStr += "), ";
+        }
+      }
+      cStr = cStr.substring(0, cStr.lastIndexOf(","));
+    }
+
+    if (isMarkingItem) {
+      if (kepRule.isCorrectionRule())
+        mark += "->UNMARK(" + kepRule.getMarkName(slotIndex);
+      else
+        mark += "->MARKONCE(" + kepRule.getMarkName(slotIndex);
+      if (patternSize > 1)
+        mark += ", " + (numberInRule + 1) + ", " + (numberInRule + patternSize);
+      mark += ")";
+    }
+    return anchor + cStr + mark + ((isMarkingItem || !this.conditions.isEmpty()) ? "}" : "");
+  }
+
+  @Override
+  public String toString() {
+    return getStringForRuleString(null, null, 0, 0, 0, 0, 0);
+  }
+
+  public boolean isStarWildCard() {
+    return isStarWildCard;
+  }
+
+  public KEPRuleItem setStarWildCard(boolean isStarWildCard) {
+    this.isStarWildCard = isStarWildCard;
+    if (isStarWildCard) {
+      this.min = 1;
+      this.max = 1;
+    }
+    return this;
+  }
+
+  public Type getType() {
+    return type;
+  }
+
+  public void setType(Type type) {
+    this.type = type;
+  }
+
+  public boolean equals(TextRulerRuleItem o) {
+    return o.toString().equals(this.toString());
+  }
+
+  public int getBegin() {
+    return this.annotation.getBegin();
+  }
+
+  public int getEnd() {
+    return this.annotation.getEnd();
+  }
+
+  public int getMin() {
+    return min;
+  }
+
+  public KEPRuleItem setMin(int min) {
+    this.min = min;
+    if (min > this.max)
+      this.max = min;
+    isStarWildCard = false;
+    return this;
+  }
+
+  public int getMax() {
+    return max;
+  }
+
+  public KEPRuleItem setMax(int max) {
+    this.max = max;
+    if (max < this.min)
+      this.min = max;
+    isStarWildCard = false;
+    return this;
+  }
+
+  public boolean isReluctant() {
+    return isReluctant;
+  }
+
+  public KEPRuleItem setReluctant(boolean isReluctant) {
+    this.isReluctant = isReluctant;
+    return this;
+  }
+
+  public KEPRuleItem addAndCondition(KEPRuleItemCondition condition) {
+    List<KEPRuleItemCondition> list = new ArrayList<KEPRuleItemCondition>();
+    list.add(condition);
+    this.conditions.add(list);
+    return this;
+  }
+
+  public List<List<KEPRuleItemCondition>> getConditions() {
+    return this.conditions;
+  }
+
+  public void setAnnotation(AnnotationFS afs) {
+    this.annotation = new TextRulerAnnotation(afs);
+    this.type = this.annotation.getType();
+  }
+
+  public KEPRuleItem setConditions(List<List<KEPRuleItemCondition>> conditions) {
+    this.conditions = conditions;
+    return this;
+  }
+
+  public KEPRuleItem addConditions(List<KEPRuleItemCondition> toMerge) {
+    this.conditions.add(toMerge);
+    return this;
+  }
+
+  public boolean containsAndCondition(Type type2) {
+    for (List<KEPRuleItemCondition> list : this.conditions) {
+      if (list.size() == 1)
+        for (KEPRuleItemCondition c : list) {
+          if (c.equals(type2))
+            return true;
+        }
+    }
+    return false;
+  }
+}
\ No newline at end of file

Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItemCondition.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItemCondition.java?rev=1491365&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItemCondition.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPRuleItemCondition.java Mon Jun 10 08:05:55 2013
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.kep;
+
+import org.apache.uima.cas.Type;
+
+public class KEPRuleItemCondition {
+
+  public enum Condition {
+    IS, PARTOF, CONTAINS, REGEXP
+  }
+
+  private Type type;
+
+  private Condition condition;
+
+  private boolean isNot;
+
+  private String regExp;
+
+  public KEPRuleItemCondition(Type type, Condition condition, boolean isNot) {
+    this.type = type;
+    this.condition = condition;
+    this.isNot = isNot;
+    this.regExp = "";
+  }
+
+  public KEPRuleItemCondition(String regExp) {
+    this.regExp = regExp;
+    this.condition = Condition.REGEXP;
+    this.isNot = false;
+  }
+
+  public String toString() {
+    return (isNot ? "-" : "") + this.condition.toString() + "("
+            + (regExp + type == null ? "" : type.getShortName()) + ")";
+  }
+
+  public boolean equals(KEPRuleItemCondition other) {
+    if (this.condition == Condition.REGEXP && other.condition == Condition.REGEXP
+            && this.regExp.equals(other.regExp) && this.isNot == other.isNot)
+      return true;
+    if (this.type.toString().equals(other.type.toString()) && this.isNot == other.isNot
+            && this.condition == other.condition)
+      return true;
+    return false;
+  }
+
+  public boolean equals(Type type) {
+    if (this.type.toString().equals(type.toString()))
+      return true;
+    return false;
+  }
+}