You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by jo...@apache.org on 2011/08/12 12:32:54 UTC
svn commit: r1157037 [4/10] - in /uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler: ./ .settings/ META-INF/ icons/ schema/ src/ src/main/ src/main/java/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/uima/ src/main/jav...

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/BasicLP2.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/BasicLP2.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/BasicLP2.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/BasicLP2.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,579 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.textmarker.textruler.learner.lp2;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.textmarker.textruler.core.TextRulerAnnotation;
+import org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner;
+import org.apache.uima.textmarker.textruler.core.TextRulerExample;
+import org.apache.uima.textmarker.textruler.core.TextRulerExampleDocument;
+import org.apache.uima.textmarker.textruler.core.TextRulerRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleList;
+import org.apache.uima.textmarker.textruler.core.TextRulerShiftExample;
+import org.apache.uima.textmarker.textruler.core.TextRulerStatisticsCollector;
+import org.apache.uima.textmarker.textruler.core.TextRulerTarget;
+import org.apache.uima.textmarker.textruler.core.TextRulerTarget.MLTargetType;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerDelegate;
+import org.apache.uima.util.FileUtils;
+
+public abstract class BasicLP2 extends TextRulerBasicLearner {
+
+  public static final String WINDOW_SIZE_KEY = "windowSize";
+
+  public static final String CURRENT_BEST_RULES_SIZE_KEY = "currentBestRulesSize";
+
+  public static final String CURRENT_CONTEXTUAL_RULES_SIZE_KEY = "currentContextualRulesSize";
+
+  public static final String MIN_COVERED_POSITIVES_PER_RULE_KEY = "minCoveredPositivesPerRule";
+
+  public static final String MAX_ERROR_THRESHOLD_KEY = "maxErrorThreshold";
+
+  public static final int STANDARD_WINDOW_SIZE = 2;
+
+  public static final int STANDARD_MAX_CURRENT_BEST_RULES_COUNT = 4;
+
+  public static final int STANDARD_MAX_CONTEXTUAL_RULES_COUNT = 4;
+
+  public static final int STANDARD_MIN_COVERED_POSITIVES_PER_RULE = 1;
+
+  public static final float STANDARD_MAX_ERROR_THRESHOLD = 0.1f;
+
+  public static final String CORRECTION_ANNOTATION_NAME = "lp2shift";
+
+  private static final int STANDARD_SHIFT_SIZE = 2;
+
+  protected int maxCurrentBestRulesCount = STANDARD_MAX_CURRENT_BEST_RULES_COUNT;
+
+  protected int maxCurrentContextualRulesCount = STANDARD_MAX_CONTEXTUAL_RULES_COUNT;
+
+  protected int windowSize = STANDARD_WINDOW_SIZE;
+
+  protected int shiftSize = STANDARD_SHIFT_SIZE;
+
+  protected int minCoveredPositives = STANDARD_MIN_COVERED_POSITIVES_PER_RULE;
+
+  protected float maxErrorThreshold = STANDARD_MAX_ERROR_THRESHOLD;
+
+  protected List<TextRulerExample> examples;
+
+  protected Set<TextRulerExample> coveredExamples;
+
+  protected int slotMaximumTokenCount = 0;
+
+  protected LP2CurrentBestRulesQueue currentBestRules;
+
+  protected LP2CurrentBestRulesQueue currentContextualRules;
+
+  protected TextRulerRuleList bestRulesPool;
+
+  protected TextRulerRuleList contextRulesPool;
+
+  protected String leftBoundaryBestRulesString = null;
+
+  protected String rightBoundaryBestRulesString = null;
+
+  protected String leftBoundaryContextualRulesString = null;
+
+  protected String rightBoundaryContextualRulesString = null;
+
+  public BasicLP2(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
+          Set<String> filterSet, TextRulerLearnerDelegate delegate) {
+    super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, delegate);
+  }
+
+  protected TextRulerRuleList learnTaggingRules(TextRulerTarget target,
+          TextRulerRuleList contextualRules) {
+    if (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY)
+      sendStatusUpdateToDelegate("Creating Left-Boundary Examples...",
+              TextRulerLearnerState.ML_RUNNING, false);
+    else if (target.type == MLTargetType.SINGLE_RIGHT_BOUNDARY)
+      sendStatusUpdateToDelegate("Creating Right-Boundary Examples...",
+              TextRulerLearnerState.ML_RUNNING, false);
+    else if (target.type == MLTargetType.SINGLE_LEFT_CORRECTION)
+      sendStatusUpdateToDelegate("Creating Left Correction Examples...",
+              TextRulerLearnerState.ML_RUNNING, false);
+    else
+      // if (target.type == MLTargetType.SINGLE_RIGHT_CORRECTION)
+      sendStatusUpdateToDelegate("Creating Right Correction Examples...",
+              TextRulerLearnerState.ML_RUNNING, false);
+    exampleDocuments.clearCurrentExamples();
+    exampleDocuments.createExamplesForTarget(target);
+    examples = exampleDocuments.getAllPositiveExamples();
+
+    if (shouldAbort())
+      return null;
+    bestRulesPool = new TextRulerRuleList();
+    contextRulesPool = new TextRulerRuleList();
+    coveredExamples = new HashSet<TextRulerExample>();
+    int roundNumber = 0;
+    for (TextRulerExample e : examples)
+      if (!coveredExamples.contains(e)) {
+        if (shouldAbort())
+          break;
+        roundNumber++;
+        currentBestRules = new LP2CurrentBestRulesQueue(maxCurrentBestRulesCount);
+        currentContextualRules = new LP2CurrentBestRulesQueue(maxCurrentContextualRulesCount);
+        // TextRulerToolkit.log("Example: "+e.getAnnotation().getBegin()+" : "+e.getAnnotation().getEnd());
+
+        induceRulesFromExample(e, roundNumber);
+
+        // TextRulerToolkit.log("Best Rules from this Seed: "+currentBestRules.size());
+        // if (TextRulerToolkit.DEBUG && currentBestRules.size()>1)
+        // {
+        // for (TextRulerRule r : currentBestRules)
+        // {
+        // TextRulerToolkit.log("\tp="+r.getCoveringStatistics().getCoveredPositivesCount()+"; n="+r.getCoveringStatistics().getCoveredNegativesCount()+";  "+r.getRuleString());
+        // for (TextRulerExample ex :
+        // r.getCoveringStatistics().getCoveredPositiveExamples())
+        // {
+        // TextRulerToolkit.log("\t\te="+ex.getAnnotation().getBegin());
+        //
+        // }
+        // }
+        // }
+        for (LP2Rule bestRule : currentBestRules) {
+          addToFinalBestRulesPool(bestRule);
+        }
+        for (LP2Rule ctxRule : currentContextualRules) {
+          addToFinalContextRulesPool(ctxRule);
+        }
+        sendStatusUpdateToDelegate("New Rules added.", TextRulerLearnerState.ML_RUNNING, true);
+      }
+    if (TextRulerToolkit.DEBUG) {
+      bestRulesPool.saveToRulesFile(getIntermediateRulesFileName(), getTMFileHeaderString());
+      // for (TextRulerRule r : bestRulesPool)
+      // {
+      // TextRulerToolkit.log("p="+r.getCoveringStatistics().getCoveredPositivesCount()+"; n="+r.getCoveringStatistics().getCoveredNegativesCount()+";  "+r.getRuleString());
+      // }
+    }
+
+    TextRulerRuleList result = bestRulesPool;
+    if (contextualRules != null)
+      for (TextRulerRule r : contextRulesPool)
+        contextualRules.add(r);
+    bestRulesPool = null;
+    contextRulesPool = null;
+    return result;
+  }
+
+  @Override
+  public CAS loadCAS(String fileName, CAS reuseCAS) {
+    CAS cas = super.loadCAS(fileName, reuseCAS);
+    prepareCASWithBoundaries(cas);
+    return cas;
+  }
+
+  public void prepareCASWithBoundaries(CAS cas) {
+    for (String slotName : slotNames)
+      TextRulerExampleDocument.createBoundaryAnnotationsForCas(cas, slotName, filterSet);
+  }
+
+  public void prepareCachedCASesWithBoundaries() {
+    for (CAS cas : exampleDocuments.getCachedCASes())
+      prepareCASWithBoundaries(cas);
+  }
+
+  @Override
+  protected void cleanUp() {
+    super.cleanUp();
+    examples = null;
+    coveredExamples = null;
+    currentBestRules = null;
+    currentContextualRules = null;
+    bestRulesPool = null;
+    contextRulesPool = null;
+  }
+
+  @Override
+  protected void doRun() {
+    TextRulerToolkit.logIfDebug("--- LP2 START");
+
+    prepareCachedCASesWithBoundaries(); // if some cases are already loaded,
+    // prepare them! all others get prepared when loaded (see loadCAS)
+
+    for (int i = 0; i < slotNames.length; i++) {
+      runForSlotName(slotNames[i]);
+    }
+
+    sendStatusUpdateToDelegate("Done", TextRulerLearnerState.ML_DONE, true);
+    TextRulerToolkit.logIfDebug("--- LP2 END");
+  }
+
+  protected void runForSlotName(String slotName) {
+    // 1. get slot length histogram in order to find maximum slot length
+    // (counted in tokens)
+
+    sendStatusUpdateToDelegate("Creating slot length histogram...",
+            TextRulerLearnerState.ML_RUNNING, false);
+    List<Integer> histogram = exampleDocuments.getTokenCountHistogrammForSlotName(slotName,
+            TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet));
+    if (shouldAbort())
+      return;
+    slotMaximumTokenCount = histogram.size() - 1; // -1 since the
+    // zero-histogram point
+    // also needs a place!
+
+    TextRulerRuleList ctxRules = new TextRulerRuleList();
+    TextRulerRuleList bestRules = learnTaggingRules(new TextRulerTarget(slotName,
+            MLTargetType.SINGLE_LEFT_BOUNDARY, this), ctxRules); // learn
+    // left
+    // boundary
+    // best
+    // rules
+    if (bestRules != null) {
+      leftBoundaryBestRulesString = bestRules.getRulesString("");
+      leftBoundaryContextualRulesString = ctxRules.getRulesString("\t");
+      bestRules.clear(); // free som memory/references
+    }
+    if (shouldAbort())
+      return;
+    ctxRules.clear();
+    bestRules = learnTaggingRules(new TextRulerTarget(slotName, MLTargetType.SINGLE_RIGHT_BOUNDARY,
+            this), ctxRules); // learn
+    // right
+    // boundary best
+    // rules
+    if (bestRules != null) {
+      rightBoundaryBestRulesString = bestRules.getRulesString("");
+      rightBoundaryContextualRulesString = ctxRules.getRulesString("\t");
+    }
+
+    // TODO add correction rule learn stuff
+    // testTaggingRulesAndCreateCorrectionRulesExamples(null, STANDARD_MAX_CONTEXTUAL_RULES_COUNT)
+
+    File file = new File(tempDirectory() + "rules.tm");
+    String resultString;
+    try {
+
+      // = getResultString();
+      // System.out.println(resultString);
+      // resultString =
+      // resultString.replaceAll("NUM[{]REGEXP[(]\"12\"[)][-][>]MARKONCE[(]stimeSTART[)][}];",
+      // "NUM{REGEXP(\"12\")} ALL{->MARKONCE(stimeSTART)};");
+      // System.out.println(resultString);
+
+      resultString = "PACKAGE org.apache.uima.ml;\n\nDocument{->FILTERTYPE(SPACE, BREAK, NBSP, MARKUP)};\n";
+      // resultString += "NUM{REGEXP(\"12\")} ALL{->MARKONCE(stimeSTART)};";
+      FileUtils.saveString2File(resultString, file);
+    } catch (IOException e) {
+      // TODO send text to ui
+    }
+
+    // correct left start
+    TextRulerTarget lsTarget = new TextRulerTarget(slotName, MLTargetType.SINGLE_LEFT_CORRECTION,
+            this);
+    lsTarget.setMaxShiftDistance(shiftSize);
+    TextRulerRuleList correctLeftRules = learnTaggingRules(lsTarget, null);
+
+    // resultString = "CAP{REGEXP(\"PM\")} ALL{->MARKONCE(stimeEND)};";
+    // try {
+    // FileUtils.saveString2File(resultString, file);
+    // } catch (IOException e) {
+    // // TODO Auto-generated catch block
+    // e.printStackTrace();
+    // }
+
+    // correct right start
+    // TextRulerTarget rsTarget = new TextRulerTarget(slotName,
+    // MLTargetType.SINGLE_RIGHT_CORRECTION,
+    // this);
+    // rsTarget.setMaxShiftDistance(shiftSize);
+    // TextRulerRuleList correctRightRules = learnTaggingRules(rsTarget, null);
+    //
+    sendStatusUpdateToDelegate("SLOT Done", TextRulerLearnerState.ML_RUNNING, true);
+    TextRulerToolkit.logIfDebug("--- LP2 END FOR SLOT:" + slotName);
+  }
+
+  protected abstract void induceRulesFromExample(TextRulerExample e, int roundNumber);
+
+  protected void addToFinalContextRulesPool(LP2Rule rule) {
+    if (TextRulerToolkit.DEBUG && false)
+      TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxpool.tm", rule.getRuleString()
+              + "\n");
+
+    if (!contextRulesPool.contains(rule)) {
+      contextRulesPool.add(rule);
+      // TextRulerToolkit.log("CONTEXT RULE: "+rule.getRuleString()+" ; "+rule.getCoveringStatistics());
+    } else {
+      if (TextRulerToolkit.DEBUG && false) {
+        TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxpool.tm", "\tDUPLICATE\n");
+      }
+    }
+
+  }
+
+  protected void addToFinalBestRulesPool(LP2Rule rule) {
+    if (TextRulerToolkit.DEBUG && false)
+      TextRulerToolkit.appendStringToFile(tempDirectory() + "bestpool.tm", rule.getRuleString()
+              + "\n");
+
+    if (!bestRulesPool.contains(rule)) {
+      bestRulesPool.add(rule);
+      // TextRulerToolkit.log("BEST RULE: "+rule.getRuleString());
+      // add all covered positives to covering set
+      coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples());
+      if (TextRulerToolkit.DEBUG)
+        bestRulesPool.saveToRulesFile(getIntermediateRulesFileName(), getTMFileHeaderString());
+    } else {
+      if (TextRulerToolkit.DEBUG && false) {
+        TextRulerToolkit.log("KANN SOWAS PASSIEREN ??");
+        TextRulerToolkit.appendStringToFile(tempDirectory() + "bestpool.tm", "\tDUPLICATE\n");
+      }
+    }
+
+  }
+
+  public String getResultString() {
+    String result = getTMFileHeaderString();
+    result += "// LEFT BOUNDARY RULES:\n";
+    if (leftBoundaryBestRulesString != null) {
+      result += leftBoundaryBestRulesString;
+      result += "\n// RIGHT BOUNDARY RULES:\n";
+      if (rightBoundaryBestRulesString != null)
+        result += rightBoundaryBestRulesString;
+      else if (bestRulesPool != null)
+        result += bestRulesPool.getRulesString("");
+
+      result += "\nBLOCK(contextualRules) Document{}\n" + "{\n"
+              + "\tDocument{->ASSIGN(redoContextualRules, false)}; // reset flag\n";
+      result += "\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n";
+      result += leftBoundaryContextualRulesString;
+
+      result += "\n\t// RIGHT BOUNDARY CONTEXTUAL RULES:\n";
+      if (rightBoundaryBestRulesString != null)
+        result += rightBoundaryContextualRulesString;
+      else if (contextRulesPool != null)
+        result += contextRulesPool.getRulesString("\t");
+
+      result += "\n\tDocument{IF(redoContextualRules)->CALL(thisFile.contextualRules)};\n}\n";
+    } else if (bestRulesPool != null) {
+      result += bestRulesPool.getRulesString("");
+      result += "\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n";
+      if (contextRulesPool != null)
+        result += contextRulesPool.getRulesString("");
+    }
+    String leftBoundary = TextRulerToolkit.getTypeShortName((new TextRulerTarget(slotNames[0],
+            MLTargetType.SINGLE_LEFT_BOUNDARY, this)).getSingleSlotTypeName());
+    String rightBoundary = TextRulerToolkit.getTypeShortName((new TextRulerTarget(slotNames[0],
+            MLTargetType.SINGLE_RIGHT_BOUNDARY, this)).getSingleSlotTypeName());
+    String slotMarkName = TextRulerToolkit.getTypeShortName(slotNames[0]);
+    int maxInnerLength = (slotMaximumTokenCount * 3) - 2;
+    result += "\n//slot-building rules:\n";
+    result += leftBoundary + "{IS(" + rightBoundary + ")->UNMARK(" + leftBoundary + "), UNMARK("
+            + rightBoundary + "), MARKONCE(" + slotMarkName + ")};\n";
+    result += leftBoundary + "{->UNMARK(" + leftBoundary + ")} ";
+    if (maxInnerLength > 0) {
+      result += "ANY[0, " + maxInnerLength + "]? ";
+      result += rightBoundary + "{->UNMARK(" + rightBoundary + "), MARKONCE(" + slotMarkName
+              + ", 1, 3)};\n";
+    } else
+      result += rightBoundary + "{->UNMARK(" + rightBoundary + "), MARKONCE(" + slotMarkName
+              + ", 1, 2)};\n";
+
+    result += "\n//cleaning up:\n" + leftBoundary + "{->UNMARK(" + leftBoundary + ")};\n"
+            + rightBoundary + "{->UNMARK(" + rightBoundary + ")};\n";
+    return result;
+  }
+
+  public void setParameters(Map<String, Object> params) {
+    if (TextRulerToolkit.DEBUG)
+      saveParametersToTempFolder(params);
+
+    // TODO try catch
+    if (params.containsKey(WINDOW_SIZE_KEY))
+      windowSize = (Integer) params.get(WINDOW_SIZE_KEY);
+
+    if (params.containsKey(CURRENT_BEST_RULES_SIZE_KEY))
+      maxCurrentBestRulesCount = (Integer) params.get(CURRENT_BEST_RULES_SIZE_KEY);
+
+    if (params.containsKey(CURRENT_CONTEXTUAL_RULES_SIZE_KEY))
+      maxCurrentContextualRulesCount = (Integer) params.get(CURRENT_CONTEXTUAL_RULES_SIZE_KEY);
+
+    if (params.containsKey(MIN_COVERED_POSITIVES_PER_RULE_KEY))
+      minCoveredPositives = (Integer) params.get(MIN_COVERED_POSITIVES_PER_RULE_KEY);
+
+    if (params.containsKey(MAX_ERROR_THRESHOLD_KEY))
+      maxErrorThreshold = (Float) params.get(MAX_ERROR_THRESHOLD_KEY);
+  }
+
+  protected String correctionRulesInputDirectory(TextRulerTarget target) {
+    if (target.isLeftBoundary())
+      return tempDirectory() + "leftCorrectionDocs";
+    else
+      return tempDirectory() + "rightCorrectionDocs";
+  }
+
+  protected boolean testTaggingRulesAndCreateCorrectionRulesExamples(TextRulerTarget target,
+          int maxDistance) {
+    try {
+      File dir = new File(correctionRulesInputDirectory(target));
+      if (!dir.exists())
+        dir.mkdir();
+      exampleDocuments.clearCurrentExamples();
+      exampleDocuments.createExamplesForTarget(target);
+      examples = exampleDocuments.getAllPositiveExamples();
+
+      TextRulerExampleDocument[] sortedDocs = exampleDocuments
+              .getSortedDocumentsInCacheOptimizedOrder();
+      TypeSystem ts = sortedDocs[0].getCAS().getTypeSystem();
+      Type tokensRootType = ts.getType(TextRulerToolkit.TM_ANY_TYPE_NAME);
+
+      // String allRulesContent = getResultString();
+      String allRulesContent = FileUtils.file2String(new File("/testinput/testrules/rules.tm"));
+      FileUtils.saveString2File(allRulesContent, new File(getTempRulesFileName()));
+
+      CAS testCAS = getTestCAS();
+      for (TextRulerExampleDocument doc : sortedDocs) {
+        TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
+        doc.resetAndFillTestCAS(testCAS, target);
+        CAS docCAS = doc.getCAS();
+        ae.process(testCAS);
+        compareOriginalDocumentWithTestCAS(doc, testCAS, target, c, true); // test whole ruleset and
+        // collect negative
+        // examples
+
+        // now we have some covered positive examples that are good, and
+        // maybe some negative examples
+        // for that we might create Correction Rules... in order to do
+        // that we have to create
+        // ShiftExamples and map negative examples (incorrect inserted
+        // boundaries) with a specific
+        // distance to an original positive example...
+
+        // TODO should that be done in both directions ? left and right
+        // ?! what happes if we
+        // find two potential examples, one left, one right ? --> for
+        // now: use the nearer one. if
+        // exactly the same distance, use the one where the wrong tag
+        // would be IN the slot filler!
+        List<TextRulerExample> correctTags = doc.getPositiveExamples();
+        List<TextRulerExample> wrongTags = new ArrayList<TextRulerExample>(
+                c.getCoveredNegativeExamples());
+        List<TextRulerShiftExample> newExamples = new ArrayList<TextRulerShiftExample>();
+        for (TextRulerExample wrongTag : wrongTags) {
+          // test, if there's a corresponding positive example
+          // somewhere around (within maxDistance)
+          List<AnnotationFS> left = TextRulerToolkit.getAnnotationsBeforePosition(docCAS, wrongTag
+                  .getAnnotation().getBegin(), maxDistance, TextRulerToolkit
+                  .getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
+          List<AnnotationFS> right = TextRulerToolkit.getAnnotationsAfterPosition(docCAS, wrongTag
+                  .getAnnotation().getEnd(), maxDistance, TextRulerToolkit
+                  .getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
+
+          // TODO stop after the first found match or create one bad
+          // example for each found occurence ??!!
+          // for now: stop after one ! so create only ONE bad
+          // example...
+          int leftDistance = 0;
+          TextRulerExample leftCorrectTag = null;
+          for (int i = left.size() - 1; i >= 0; i--) {
+            leftDistance++;
+            TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(left.get(i),
+                    doc, target, docCAS.getTypeSystem());
+            leftCorrectTag = TextRulerToolkit.exampleListContainsAnnotation(correctTags, needle);
+            if (leftCorrectTag != null)
+              break;
+          }
+
+          int rightDistance = 0;
+          TextRulerExample rightCorrectTag = null;
+          for (AnnotationFS fs : right) {
+            rightDistance++;
+            TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(fs, doc,
+                    target, docCAS.getTypeSystem());
+            rightCorrectTag = TextRulerToolkit.exampleListContainsAnnotation(correctTags, needle);
+            if (rightCorrectTag != null)
+              break;
+          }
+
+          TextRulerExample theCorrectTag = null;
+          if (rightDistance < leftDistance && rightCorrectTag != null)
+            theCorrectTag = rightCorrectTag;
+          else if (rightDistance > leftDistance && leftCorrectTag != null)
+            theCorrectTag = leftCorrectTag;
+          else // use the one that would lie in the slot filler:
+          {
+            if (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY && rightCorrectTag != null)
+              theCorrectTag = rightCorrectTag;
+            else
+              theCorrectTag = leftCorrectTag;
+          }
+
+          if (theCorrectTag != null) {
+            TextRulerToolkit.log("FOUND BAD EXAMPLE FOR SHIFTING !!");
+            TextRulerShiftExample shiftExample = new TextRulerShiftExample(doc,
+                    wrongTag.getAnnotation(), theCorrectTag.getAnnotation(), true, target);
+            newExamples.add(shiftExample);
+          }
+        }
+        TextRulerToolkit
+                .writeCAStoXMIFile(testCAS, dir + File.pathSeparator + doc.getCasFileName());
+      }
+      testCAS.reset();
+    } catch (Exception e) {
+      e.printStackTrace();
+      return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public String getTMFileHeaderString() {
+    return super.getTMFileHeaderString() + "BOOLEAN redoContextualRules;\n\n";
+  }
+
+  @Override
+  protected boolean checkForMandatoryTypes() {
+    if (!super.checkForMandatoryTypes())
+      return false;
+
+    CAS someCas = getTestCAS();
+    TypeSystem ts = someCas.getTypeSystem();
+    // check if all helper types are present:
+    List<String> list = new ArrayList<String>();
+
+    // only the first slot is important for now...
+    list.add(new TextRulerTarget(slotNames[0], MLTargetType.SINGLE_LEFT_BOUNDARY, this)
+            .getSingleSlotTypeName());
+    list.add(new TextRulerTarget(slotNames[0], MLTargetType.SINGLE_RIGHT_BOUNDARY, this)
+            .getSingleSlotTypeName());
+
+    // TODO add correction types here!
+    for (String s : list) {
+      if (ts.getType(s) == null)
+        return false;
+    }
+    return true;
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/BasicLP2.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/BasicLP2.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2CurrentBestRulesQueue.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2CurrentBestRulesQueue.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2CurrentBestRulesQueue.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2CurrentBestRulesQueue.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.lp2;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.PriorityQueue;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+
+public class LP2CurrentBestRulesQueue implements Iterable<LP2Rule> {
+
+  private PriorityQueue<LP2Rule> ruleList;
+
+  private PriorityQueue<LP2Rule> reverseRuleList;
+
+  private int maxSize;
+
+  private static int cmpRules(LP2Rule o1, LP2Rule o2) {
+    // 1st criterion: sort by decreasing number of positive matches:
+    if (o1.getCoveringStatistics().getCoveredPositivesCount() > o2.getCoveringStatistics()
+            .getCoveredPositivesCount())
+      return -1;
+    else if (o1.getCoveringStatistics().getCoveredPositivesCount() < o2.getCoveringStatistics()
+            .getCoveredPositivesCount())
+      return 1;
+    else {
+      // 2nd criterion: sort by increasing error rate:
+      if (o1.getErrorRate() < o2.getErrorRate())
+        return -1;
+      else if (o1.getErrorRate() > o2.getErrorRate())
+        return 1;
+      else {
+        // 3rd criterion:
+        // TODO:
+        // if one rule has more positive matches than a threshold then
+        // prefere the one with more generic
+        // conditions; else prefer the other one.
+
+        // test for now: prefer more general rules !
+        int c1 = o1.totalConstraintCount();
+        int c2 = o2.totalConstraintCount();
+        if (c1 < c2)
+          return -1;
+        else if (c1 > c2)
+          return 1;
+        else {
+          return o1.getRuleString().compareTo(o2.getRuleString());
+        }
+      }
+    }
+
+  }
+
+  public LP2CurrentBestRulesQueue(int maxSize) {
+    this.maxSize = Math.max(1, maxSize);
+
+    ruleList = new PriorityQueue<LP2Rule>(this.maxSize, new Comparator<LP2Rule>() {
+      public int compare(LP2Rule o1, LP2Rule o2) {
+        return cmpRules(o1, o2);
+      }
+    });
+    this.reverseRuleList = new PriorityQueue<LP2Rule>(this.maxSize, new Comparator<LP2Rule>() {
+      public int compare(LP2Rule o1, LP2Rule o2) {
+        return -cmpRules(o1, o2);
+      }
+    });
+  }
+
+  public Iterator<LP2Rule> iterator() {
+    return ruleList.iterator();
+  }
+
+  public void clear() {
+    ruleList.clear();
+    reverseRuleList.clear();
+  }
+
+  public void addAll(Collection<LP2Rule> rules) {
+    for (LP2Rule r : rules)
+      add(r);
+  }
+
+  public void add(LP2Rule rule) {
+    ruleList.add(rule);
+    reverseRuleList.add(rule);
+    // TextRulerToolkit.log("ADD TO RULE LIST, SIZE="+ruleList.size()+"    revSize="+reverseRuleList.size());
+    // for (LP2Rule r : ruleList)
+    // {
+    // TextRulerToolkit.log("Rule Value: "+r.getPriority()+"  peek: "+ruleList.peek().getPriority());
+    // }
+  }
+
+  public boolean contains(LP2Rule rule) {
+    return ruleList.contains(rule);
+  }
+
+  // returns the removed objects
+  public Collection<LP2Rule> cutToMaxSize() {
+    ArrayList<LP2Rule> result = new ArrayList<LP2Rule>();
+    while (ruleList.size() > maxSize) {
+      Object tail = reverseRuleList.peek();
+      ruleList.remove(tail);
+      reverseRuleList.remove(tail);
+      result.add((LP2Rule) tail);
+    }
+    return result;
+  }
+
+  public LP2Rule peek() {
+    return ruleList.peek();
+  }
+
+  public void remove(LP2Rule r) {
+    ruleList.remove(r);
+    reverseRuleList.remove(r);
+  }
+
+  public LP2Rule[] toArray() {
+    LP2Rule[] result = new LP2Rule[ruleList.size()];
+    int i = 0;
+    for (LP2Rule r : ruleList) {
+      result[i] = r;
+      i++;
+    }
+    return result;
+  }
+
+  protected void removeSubsumedRules() {
+    ArrayList<LP2Rule> removeList = new ArrayList<LP2Rule>();
+    LP2Rule[] rulesArray = toArray();
+    for (int index1 = 0; index1 < rulesArray.length - 1; index1++) {
+      LP2Rule rule1 = rulesArray[index1];
+      for (int index2 = index1 + 1; index2 < rulesArray.length; index2++) {
+        LP2Rule rule2 = rulesArray[index2];
+        if (rule1.getCoveringStatistics().getCoveredPositiveExamples().containsAll(
+                rule2.getCoveringStatistics().getCoveredPositiveExamples()))
+          removeList.add(rule2);
+      }
+
+    }
+    // if (TextRulerToolkit.DEBUG && removeList.size() > 0)
+    // TextRulerToolkit.log("[removeSubsumedRules] REMOVED "+removeList.size()+" RULES");
+    for (LP2Rule r : removeList)
+      remove(r);
+  }
+
+  public void printDebug() {
+    TextRulerToolkit.log("-------CURRENT BEST RULES:");
+    for (LP2Rule r : ruleList) {
+      TextRulerToolkit.log(r.getRuleString() + " " + r.getCoveringStatistics() + "  error="
+              + r.getErrorRate() + "  constraints=" + r.totalConstraintCount());
+    }
+  }
+
+  public int size() {
+    return ruleList.size();
+  }
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2CurrentBestRulesQueue.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2CurrentBestRulesQueue.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2Rule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2Rule.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2Rule.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2Rule.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.lp2;
+
+import org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner;
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerSingleSlotRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerStatisticsCollector;
+import org.apache.uima.textmarker.textruler.core.TextRulerTarget;
+import org.apache.uima.textmarker.textruler.core.TextRulerTarget.MLTargetType;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+
+public class LP2Rule extends TextRulerSingleSlotRule {
+
+  protected float errorRate;
+
+  protected boolean setIsPreFillerStartRule = false;
+
+  protected boolean isContextualRule = false;
+
+  public LP2Rule(TextRulerBasicLearner parentAlgorithm, TextRulerTarget target) {
+    super(parentAlgorithm, target);
+
+  }
+
+  // copy constructor:
+  protected LP2Rule(LP2Rule copyFrom) {
+    super(copyFrom);
+    errorRate = copyFrom.errorRate;
+    setIsPreFillerStartRule = copyFrom.setIsPreFillerStartRule;
+    isContextualRule = copyFrom.isContextualRule;
+  }
+
+  @Override
+  public LP2Rule copy() {
+    return new LP2Rule(this);
+  }
+
+  @Override
+  public void setCoveringStatistics(TextRulerStatisticsCollector c) {
+    super.setCoveringStatistics(c);
+    int p = c.getCoveredPositivesCount();
+    int n = c.getCoveredNegativesCount();
+    if (p < 1) {
+      TextRulerToolkit.log("ERROR, A RULE MAY NOT COVER ZERO POSITIVE EXAMPLES! WHAT'S WRONG ?");
+      TextRulerToolkit.log("\tRULE: " + getRuleString());
+      // make sure this rule is rated totally bad:
+      errorRate = Float.MAX_VALUE;
+    } else {
+      errorRate = ((float) n) / ((float) p);
+    }
+  }
+
+  public float getErrorRate() {
+    return errorRate;
+  }
+
+  public int totalConstraintCount() {
+    int result = 0;
+    // every item itself counts 1 (so a wildcard "ANY" item counts also as a
+    // constraint, since it says: HERE HAS TO BE A TOKEN!)
+    // and every constraint on that token also counts 1.
+    for (TextRulerRuleItem i : slotPattern.preFillerPattern)
+      result += 1 + ((LP2RuleItem) i).totalConstraintCount();
+    for (TextRulerRuleItem i : slotPattern.postFillerPattern)
+      result += 1 + ((LP2RuleItem) i).totalConstraintCount();
+    return result;
+  }
+
+  public int totalInnerConstraintCount() // counts only the constraints in the
+  // items, but does not count the
+  // items as a constraint themselves!
+  {
+    int result = 0;
+    for (TextRulerRuleItem i : slotPattern.preFillerPattern)
+      result += ((LP2RuleItem) i).totalConstraintCount();
+    for (TextRulerRuleItem i : slotPattern.postFillerPattern)
+      result += ((LP2RuleItem) i).totalConstraintCount();
+    return result;
+  }
+
+  public boolean isPreFillerStartRule() {
+    return setIsPreFillerStartRule;
+  }
+
+  public void setIsPreFillerStartRule(boolean flag) {
+    setIsPreFillerStartRule = flag;
+  }
+
+  public LP2RuleItem getMarkingRuleItem() {
+    if (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY)
+      return (LP2RuleItem) slotPattern.postFillerPattern.get(0);
+    else
+      return (LP2RuleItem) slotPattern.preFillerPattern
+              .get(slotPattern.preFillerPattern.size() - 1);
+  }
+
+  public boolean isContextualRule() {
+    return isContextualRule;
+  }
+
+  public void setIsContextualRule(boolean flag) {
+    if (flag != isContextualRule) {
+      isContextualRule = flag;
+      setNeedsCompile(true);
+    }
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2Rule.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2Rule.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2RuleItem.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2RuleItem.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2RuleItem.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2RuleItem.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.lp2;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.uima.cas.Type;
+import org.apache.uima.textmarker.textruler.core.TextRulerAnnotation;
+import org.apache.uima.textmarker.textruler.core.TextRulerRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerSingleSlotRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerTarget.MLTargetType;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+import org.apache.uima.textmarker.textruler.core.TextRulerWordConstraint;
+
+public class LP2RuleItem implements TextRulerRuleItem {
+
+  protected TextRulerWordConstraint wordConstraint;
+
+  protected MLLP2ContextConstraint contextConstraint;
+
+  protected List<MLLP2OtherConstraint> otherConstraints = new ArrayList<MLLP2OtherConstraint>();
+
+  public static class MLLP2ContextConstraint {
+    private String contextBoundaryName;
+
+    private int contextSize;
+
+    private boolean direction; // false = left; true = right;
+
+    public MLLP2ContextConstraint(int contextSize, LP2Rule parentRule) {
+      super();
+      this.contextSize = contextSize;
+      contextBoundaryName = TextRulerToolkit.getTypeShortName(parentRule.getTarget()
+              .getCounterPartBoundaryTarget().getSingleSlotTypeName());
+      direction = parentRule.getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY ? true : false;
+    }
+
+    public MLLP2ContextConstraint(MLLP2ContextConstraint copyFrom) {
+      super();
+      contextBoundaryName = copyFrom.contextBoundaryName;
+      contextSize = copyFrom.contextSize;
+      direction = copyFrom.direction;
+    }
+
+    public MLLP2ContextConstraint copy() {
+      return new MLLP2ContextConstraint(this);
+    }
+
+    @Override
+    public String toString() {
+      return "NEAR(" + contextBoundaryName + ", 0," + contextSize + ","
+              + (direction ? "true" : "false") + ",true)";
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      return toString().equals(((MLLP2ContextConstraint) o).toString());
+    }
+
+    @Override
+    public int hashCode() {
+      return toString().hashCode();
+    }
+
+  }
+
+  public static class MLLP2OtherConstraint {
+
+    TextRulerAnnotation tokenAnnotation;
+
+    TextRulerAnnotation constraintAnnotation;
+
+    boolean canBeAnchor;
+
+    Type type;
+
+    public MLLP2OtherConstraint(TextRulerAnnotation tokenAnnotation,
+            TextRulerAnnotation constraintAnnotation) {
+      this.tokenAnnotation = tokenAnnotation;
+      this.constraintAnnotation = constraintAnnotation;
+      this.type = constraintAnnotation.getType();
+      canBeAnchor = (tokenAnnotation.getBegin() == constraintAnnotation.getBegin())
+              && (tokenAnnotation.getEnd() == constraintAnnotation.getEnd());
+      // TODO is the matching END also a requirement ?
+    }
+
+    public boolean isTMBasicTypeTokenConstraint() {
+      return tokenAnnotation == constraintAnnotation;
+    }
+
+    public boolean canBeAnchorConstraint() {
+      return canBeAnchor;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      MLLP2OtherConstraint co = (MLLP2OtherConstraint) o;
+
+      return toString().equals(co.toString()) && (canBeAnchor == co.canBeAnchor);
+    }
+
+    @Override
+    public int hashCode() {
+      return toString().hashCode() * (canBeAnchor ? 2 : 1);
+    }
+
+    @Override
+    public String toString() {
+      return type.getShortName();
+    }
+
+    public MLLP2OtherConstraint copy() {
+      return new MLLP2OtherConstraint(tokenAnnotation, constraintAnnotation);
+    }
+
+  }
+
+  public LP2RuleItem(LP2RuleItem copyFrom) {
+    super();
+    if (copyFrom.wordConstraint != null)
+      wordConstraint = copyFrom.wordConstraint.copy();
+    if (copyFrom.contextConstraint != null)
+      contextConstraint = copyFrom.contextConstraint.copy();
+    for (MLLP2OtherConstraint c : copyFrom.otherConstraints)
+      otherConstraints.add(c.copy());
+  }
+
+  public LP2RuleItem() {
+    super();
+  }
+
+  public LP2RuleItem copy() {
+    return new LP2RuleItem(this);
+  }
+
+  @Override
+  public int hashCode() {
+    return toString().hashCode();
+  }
+
+  public boolean equals(TextRulerRuleItem o) {
+    return toString().equals(((LP2RuleItem) o).toString());
+  }
+
+  @Override
+  public String toString() {
+    return getStringForRuleString(null, null, 0, 0, 0, 0, 0);
+  }
+
+  public MLLP2OtherConstraint getTMBasicTypeTokenConstraint() {
+    for (MLLP2OtherConstraint c : otherConstraints)
+      if (c.isTMBasicTypeTokenConstraint())
+        return c;
+    return null;
+  }
+
+  public String getStringForRuleString(TextRulerRule rule, MLRuleItemType type,
+          int numberInPattern, int patternSize, int numberInRule, int ruleSize, int slotIndex) {
+
+    String result = "";
+    LP2Rule lp2Rule = (LP2Rule) rule;
+    boolean isMarkingItem = (rule != null)
+            && (((rule.getTarget().type == MLTargetType.SINGLE_RIGHT_BOUNDARY)
+                    && (type == MLRuleItemType.PREFILLER) && (numberInPattern == patternSize - 1)) || ((rule
+                    .getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY)
+                    && (type == MLRuleItemType.POSTFILLER) && (numberInPattern == 0)));
+
+    ArrayList<String> constraints = new ArrayList<String>();
+
+    String anchor = null;
+
+    if (wordConstraint == null)
+      anchor = "ANY";
+    else {
+      if (wordConstraint.isRegExpConstraint()) {
+        anchor = wordConstraint.typeShortName();
+        constraints.add("REGEXP(\"" + wordConstraint + "\")");
+      } else
+        anchor = wordConstraint.toString();
+    }
+
+    if (isMarkingItem && lp2Rule.isContextualRule())
+      constraints.add("-IS(" + ((TextRulerSingleSlotRule) rule).getMarkName() + ")");
+
+    if (contextConstraint != null)
+      constraints.add(contextConstraint.toString());
+
+    MLLP2OtherConstraint anchorConstraint = null;
+    if (wordConstraint == null) {
+
+      // prefer the basic TextMarker constraint as the anchor
+      anchorConstraint = getTMBasicTypeTokenConstraint(); // returns null
+      // if we don't
+      // have one...
+
+      if (anchorConstraint == null) {
+        for (MLLP2OtherConstraint c : otherConstraints)
+          if (c.canBeAnchorConstraint()) {
+            anchorConstraint = c;
+            break;
+          }
+      }
+      for (MLLP2OtherConstraint oc : otherConstraints) {
+        if (oc != anchorConstraint) {
+          if (oc.canBeAnchorConstraint())
+            constraints.add("IS(" + oc + ")");
+          else
+            constraints.add("PARTOF(" + oc + ")");
+        }
+      }
+      if (anchorConstraint != null)
+        anchor = anchorConstraint.toString();
+    }
+
+    if (constraints.size() > 0) {
+      String cStr = "";
+      for (String constraintStr : constraints) {
+        if (cStr.length() > 0)
+          cStr += ", ";
+        cStr += constraintStr;
+      }
+      result += "{" + cStr;
+    }
+
+    if ((rule != null)
+            && (((rule.getTarget().type == MLTargetType.SINGLE_RIGHT_BOUNDARY)
+                    && (type == MLRuleItemType.PREFILLER) && (numberInPattern == patternSize - 1)) || ((rule
+                    .getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY)
+                    && (type == MLRuleItemType.POSTFILLER) && (numberInPattern == 0)))) {
+      // result +=
+      // "(MARKONCE, "+((TextRulerSingleSlotRule)rule).getMarkName()+", "+(numberInRule+1);
+      if (constraints.size() == 0)
+        result += "{";
+      result += "->MARKONCE(" + ((TextRulerSingleSlotRule) rule).getMarkName() + ")";
+      if (lp2Rule.isContextualRule())
+        result += ", ASSIGN(redoContextualRules, true)";
+      result += "}";
+    } else {
+      if (constraints.size() != 0)
+        result += "}";
+    }
+    return anchor + result;
+  }
+
+  public void addOtherConstraint(MLLP2OtherConstraint c) {
+    if (!otherConstraints.contains(c))
+      otherConstraints.add(c);
+  }
+
+  public List<MLLP2OtherConstraint> getOtherConstraints() {
+    return otherConstraints;
+  }
+
+  public void setWordConstraint(TextRulerAnnotation tokenAnnotation) {
+    setWordConstraint(new TextRulerWordConstraint(tokenAnnotation));
+  }
+
+  public void setContextConstraint(MLLP2ContextConstraint c) {
+    contextConstraint = c;
+  }
+
+  public MLLP2ContextConstraint getContextConstraint() {
+    return contextConstraint;
+  }
+
+  public void setWordConstraint(TextRulerWordConstraint c) {
+    wordConstraint = c;
+  }
+
+  public TextRulerWordConstraint getWordConstraint() {
+    return wordConstraint;
+  }
+
+  public void removeConstraintWithName(String name) {
+    otherConstraints.remove(name);
+  }
+
+  public int totalConstraintCount() {
+    return otherConstraints.size() + (wordConstraint != null ? 1 : 0)
+            + (contextConstraint != null ? 1 : 0);
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2RuleItem.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/LP2RuleItem.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.lp2;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.textmarker.textruler.core.TextRulerAnnotation;
+import org.apache.uima.textmarker.textruler.core.TextRulerExample;
+import org.apache.uima.textmarker.textruler.core.TextRulerRule;
+import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
+import org.apache.uima.textmarker.textruler.core.TextRulerRulePattern;
+import org.apache.uima.textmarker.textruler.core.TextRulerTarget;
+import org.apache.uima.textmarker.textruler.core.TextRulerTarget.MLTargetType;
+import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerDelegate;
+import org.apache.uima.textmarker.textruler.learner.lp2.LP2RuleItem.MLLP2ContextConstraint;
+import org.apache.uima.textmarker.textruler.learner.lp2.LP2RuleItem.MLLP2OtherConstraint;
+import org.apache.uima.util.FileUtils;
+
+public class NaiveLP2 extends BasicLP2 {
+
+  public NaiveLP2(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
+          Set<String> filterSet, TextRulerLearnerDelegate delegate) {
+    super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, delegate);
+  }
+
+  public static final boolean SAVE_DEBUG_INFO_IN_TEMPFOLDER = false;
+
+  @Override
+  protected void induceRulesFromExample(TextRulerExample e, int roundNumber) {
+    LP2Rule baseRule = createInitialRuleForPositiveExample(e);
+    List<LP2Rule> genRules = generalizeRule(baseRule);
+
+    if (shouldAbort())
+      return;
+
+    List<LP2Rule> test = new ArrayList<LP2Rule>();
+
+    // int i=1;
+    // for (LP2Rule newRule : genRules)
+    // {
+    // if (shouldAbort())
+    // return;
+    // sendStatusUpdateToDelegate("Round "+roundNumber+" - Testing proposed generalization "+i+"/"+(genRules.size())+
+    // "    - uncovered examples: "+
+    // (examples.size()-coveredExamples.size() + " / "+examples.size()),
+    // TextRulerLearnerState.ML_RUNNING, false);
+    // i++;
+    // testRuleOnDocumentSet(newRule, exampleDocuments);
+    //
+    // checkAndHandleNewRule(newRule);
+    //			
+    // if (TextRulerToolkit.DEBUG)
+    // test.add(newRule);
+    // }
+    // new cache and testCAS optimized rule testing:
+
+    sendStatusUpdateToDelegate("Round " + roundNumber + " - Testing " + (genRules.size())
+            + "generalizations... - uncovered examples: "
+            + (examples.size() - coveredExamples.size() + " / " + examples.size()),
+            TextRulerLearnerState.ML_RUNNING, false);
+    testRulesOnDocumentSet(new ArrayList<TextRulerRule>(genRules), exampleDocuments);
+
+    for (LP2Rule newRule : genRules) {
+      checkAndHandleNewRule(newRule);
+      if (TextRulerToolkit.DEBUG)
+        test.add(newRule);
+    }
+
+    if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER) {
+      Collections.sort(test, new Comparator<LP2Rule>() {
+
+        public int compare(LP2Rule o1, LP2Rule o2) {
+          return o1.getRuleString().compareTo(o2.getRuleString());
+        }
+
+      });
+
+      String startend = e.getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY ? "left_"
+              : "right_";
+      File file = new File(tempDirectory() + startend + "generalizations" + roundNumber + ".tm");
+      StringBuffer str = new StringBuffer();
+      for (TextRulerRule rule : test) {
+        str.append(rule.getCoveringStatistics() + "\t\t" + rule.getRuleString() + "\n");
+      }
+      try {
+        FileUtils.saveString2File(str.toString(), file);
+      } catch (Exception ex) {
+        ex.printStackTrace();
+      }
+    }
+
+  }
+
+  protected void checkAndHandleNewRule(LP2Rule rule) {
+    boolean tooFewPositives = rule.getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives;
+    boolean tooManyErrors = rule.getErrorRate() > maxErrorThreshold;
+
+    boolean isBestRule = !(tooFewPositives || tooManyErrors);
+
+    if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER)
+      TextRulerToolkit.appendStringToFile(tempDirectory() + "bestcandidates.tm", rule
+              .getRuleString()
+              + "\n");
+
+    if (isBestRule) {
+      currentBestRules.add(rule);
+      currentBestRules.removeSubsumedRules();
+      currentBestRules.cutToMaxSize();
+    } else if (!tooFewPositives) {
+
+      // test in context
+      // in our TM representation, we simply can add a NEAR condition in
+      // the MARKing rule item and retest it on the
+      // corpus. we should do that for all kinds of tags we have, but
+      // currently we only do it for the corresponding opening/closing
+      // tag, since we do not have any information about other slots yet!
+      // // TODO use all other slot tags! (see optimized version as well)
+
+      if (true) {
+        rule = rule.copy();
+        LP2RuleItem item = rule.getMarkingRuleItem();
+        // TextRulerToolkit.log("CONTEXTUAL RULE CANDIDATE: "+rule.getRuleString()+"  ;  "+rule.getCoveringStatistics());
+        item.setContextConstraint(new MLLP2ContextConstraint(slotMaximumTokenCount, rule));
+        rule.setIsContextualRule(true);
+
+        rule.setNeedsCompile(true);
+
+        if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER)
+          TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxcandidates.tm", rule
+                  .getRuleString());
+
+        testRuleOnDocumentSet(rule, exampleDocuments); // not very
+        // fast... but
+        // works!
+        boolean ctxTooFewPositives = rule.getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives;
+        boolean ctxTooManyErrors = rule.getErrorRate() > maxErrorThreshold;
+        boolean isGoodContextRule = !(ctxTooFewPositives || ctxTooManyErrors);
+        if (isGoodContextRule) {
+          currentContextualRules.add(rule);
+          currentContextualRules.removeSubsumedRules();
+          currentContextualRules.cutToMaxSize();
+        }
+      }
+
+    }
+  }
+
+  protected List<LP2Rule> generalizeRule(LP2Rule baseRule) {
+    List<LP2Rule> result = new ArrayList<LP2Rule>();
+    TextRulerRulePattern rulePattern = new TextRulerRulePattern();
+    TextRulerRulePattern prePattern = baseRule.getPreFillerPattern();
+
+    for (int i = prePattern.size() - 1; i >= 0; i--) // we have to reverse
+    // the order again!
+    {
+      rulePattern.add(prePattern.get(i));
+    }
+    rulePattern.addAll(baseRule.getPostFillerPattern());
+
+    recursiveGeneralizeRule(baseRule, rulePattern, new TextRulerRulePattern(), result);
+    TextRulerToolkit.log("GENERALIZATIONS: " + result.size());
+
+    for (LP2Rule r : result)
+      removeOutermostWildCardItemsFromRule(r);
+
+    // for (LP2Rule r : result)
+    // {
+    // TextRulerToolkit.log("NEWRULE = "+r.getRuleString());
+    // }
+
+    return result;
+  }
+
+  protected LP2Rule createInitialRuleForPositiveExample(TextRulerExample example) {
+    TextRulerTarget target = example.getTarget();
+    LP2Rule rule = new LP2Rule(this, example.getTarget());
+    CAS docCas = example.getDocumentCAS();
+    TextRulerAnnotation exampleAnnotation = example.getAnnotation();
+    TypeSystem ts = docCas.getTypeSystem();
+    Type tokensRootType = ts.getType(TextRulerToolkit.TM_ANY_TYPE_NAME);
+    int thePosition = target.type == MLTargetType.SINGLE_LEFT_BOUNDARY ? exampleAnnotation
+            .getBegin() : exampleAnnotation.getEnd();
+
+    List<AnnotationFS> leftContext = TextRulerToolkit.getAnnotationsBeforePosition(docCas,
+            thePosition, windowSize, TextRulerToolkit.getFilterSetWithSlotNames(slotNames,
+                    filterSet), tokensRootType);
+    List<AnnotationFS> rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas,
+            thePosition, windowSize, TextRulerToolkit.getFilterSetWithSlotNames(slotNames,
+                    filterSet), tokensRootType);
+
+    // the left context has to be reversed since we get the arrayList from
+    // the slot's point of view!
+    for (int i = leftContext.size() - 1; i >= 0; i--) {
+      TextRulerAnnotation annot = new TextRulerAnnotation(leftContext.get(i), example.getDocument());
+      LP2RuleItem item = new LP2RuleItem();
+      item.setWordConstraint(annot);
+      if (item.getWordConstraint().isRegExpConstraint())
+        item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));
+      rule.addPreFillerItem(item);
+    }
+
+    for (AnnotationFS afs : rightContext) {
+      TextRulerAnnotation annot = new TextRulerAnnotation(afs, example.getDocument());
+      LP2RuleItem item = new LP2RuleItem();
+      item.setWordConstraint(annot);
+      if (item.getWordConstraint().isRegExpConstraint())
+        item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));
+
+      rule.addPostFillerItem(item);
+    }
+    TextRulerToolkit.log("INITIAL RULE: " + rule.getRuleString());
+    return rule;
+  }
+
+  protected void recursiveGeneralizeRule(LP2Rule baseRule, TextRulerRulePattern allItems,
+          TextRulerRulePattern currentPattern, List<LP2Rule> resultList) {
+    if (currentPattern.size() == allItems.size()) {
+      // create new Rule
+      LP2Rule newRule = new LP2Rule(this, baseRule.getTarget());
+      int preCount = baseRule.getPreFillerPattern().size();
+      for (int i = 0; i < currentPattern.size(); i++) {
+        if (i < preCount)
+          newRule.addPreFillerItem(currentPattern.get(i));
+        else
+          newRule.addPostFillerItem(currentPattern.get(i));
+      }
+      // TextRulerToolkit.log("GEN: "+newRule.getRuleString());
+      if (newRule.totalInnerConstraintCount() > 0) // skip the ANY ANY ANY
+        // ANY... rule ! this
+        // makes no sense in no
+        // application!!
+        resultList.add(newRule);
+    } else {
+      int index = currentPattern.size();
+      TextRulerRuleItem baseItem = allItems.get(index);
+      List<TextRulerRuleItem> itemGeneralizations = generalizeRuleItem((LP2RuleItem) baseItem);
+      for (TextRulerRuleItem newItem : itemGeneralizations) {
+        currentPattern.add(newItem);
+        recursiveGeneralizeRule(baseRule, allItems, currentPattern, resultList);
+        currentPattern.remove(currentPattern.size() - 1);
+      }
+    }
+  }
+
+  protected void recursiveGeneralizeRuleItem(LP2RuleItem baseItem,
+          List<MLLP2OtherConstraint> otherConstraints, int currentConstraintIndex,
+          List<MLLP2OtherConstraint> currentConstraintTuple, List<TextRulerRuleItem> result) {
+    if (currentConstraintIndex > otherConstraints.size() - 1) {
+      LP2RuleItem newItem;
+      newItem = new LP2RuleItem();
+      for (MLLP2OtherConstraint c : currentConstraintTuple)
+        newItem.addOtherConstraint(c.copy());
+      result.add(newItem);
+    } else {
+      MLLP2OtherConstraint currentConstraint = otherConstraints.get(currentConstraintIndex);
+      // recurse WITH and WITHOUT this key:
+      recursiveGeneralizeRuleItem(baseItem, otherConstraints, currentConstraintIndex + 1,
+              currentConstraintTuple, result);
+      currentConstraintTuple.add(currentConstraint);
+      recursiveGeneralizeRuleItem(baseItem, otherConstraints, currentConstraintIndex + 1,
+              currentConstraintTuple, result);
+      currentConstraintTuple.remove(currentConstraintTuple.size() - 1);
+    }
+  }
+
+  protected List<TextRulerRuleItem> generalizeRuleItem(LP2RuleItem baseItem) {
+    List<TextRulerRuleItem> result = new ArrayList<TextRulerRuleItem>();
+
+    // one with word constraint
+    if (baseItem.getWordConstraint() != null) {
+      LP2RuleItem newItem = new LP2RuleItem();
+      newItem.setWordConstraint(baseItem.getWordConstraint().copy());
+      result.add(newItem);
+    }
+
+    // all other combinations without word constraint
+    // List<String> keys = new
+    // ArrayList<String>(baseItem.getOtherConstraints().keySet());
+    List<MLLP2OtherConstraint> constraints = baseItem.getOtherConstraints();
+    recursiveGeneralizeRuleItem(baseItem, constraints, 0, new ArrayList<MLLP2OtherConstraint>(),
+            result);
+    return result;
+  }
+
+  protected void removeOutermostWildCardItemsFromRule(LP2Rule rule) {
+    while (true) {
+      LP2RuleItem item = (LP2RuleItem) rule.getOutermostPreFillerItem();
+      if (item == null) // no more items left
+        break;
+
+      // if this rule is a RIGHT BOUNDARY rule, we must not remove the
+      // last remaining pre filler item,
+      // since this is used for marking the SLOT END BOUNDARY (= RIGHT
+      // BOUNDARY)
+      if ((rule.getTarget().type == MLTargetType.SINGLE_RIGHT_BOUNDARY)
+              && (rule.getPreFillerPattern().size() == 1))
+        break;
+
+      if (item.totalConstraintCount() == 0)
+        rule.removeOutermostPreFillerItem();
+      else
+        break;
+    }
+    while (true) {
+      LP2RuleItem item = (LP2RuleItem) rule.getOutermostPostFillerItem();
+      if (item == null) // no more items left
+        break;
+
+      // if this rule is a LEFT BOUNDARY rule, we must not remove the last
+      // remaining post filler item,
+      // since this is used for marking the SLOT START BOUNDARY (= LEFT
+      // BOUNDARY)
+      if ((rule.getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY)
+              && (rule.getPostFillerPattern().size() == 1))
+        break;
+
+      if (item.totalConstraintCount() == 0)
+        rule.removeOutermostPostFillerItem();
+      else
+        break;
+    }
+  }
+
+  @Override
+  public boolean collectNegativeCoveredInstancesWhenTesting() {
+    return false;
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2Factory.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2Factory.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2Factory.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2Factory.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.textmarker.textruler.learner.lp2;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearner;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerDelegate;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerFactory;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerParameter;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerParameter.MLAlgorithmParamType;
+
+public class NaiveLP2Factory implements TextRulerLearnerFactory {
+
+  public TextRulerLearner createAlgorithm(String inputFolderPath, String additionalFolderPath,
+          String prePropTMFile, String tempFolderPath, String[] fullSlotTypeNames,
+          Set<String> filterSet, TextRulerLearnerDelegate delegate) {
+    return new NaiveLP2(inputFolderPath, prePropTMFile, tempFolderPath, fullSlotTypeNames,
+            filterSet, delegate);
+  }
+
+  public TextRulerLearnerParameter[] getAlgorithmParameters() {
+    TextRulerLearnerParameter[] result = new TextRulerLearnerParameter[5];
+
+    result[0] = new TextRulerLearnerParameter(BasicLP2.WINDOW_SIZE_KEY,
+            "Context Window Size (to the left and right)", MLAlgorithmParamType.ML_INT_PARAM);
+    result[1] = new TextRulerLearnerParameter(BasicLP2.CURRENT_BEST_RULES_SIZE_KEY,
+            "Best Rules List Size", MLAlgorithmParamType.ML_INT_PARAM);
+    result[2] = new TextRulerLearnerParameter(BasicLP2.MIN_COVERED_POSITIVES_PER_RULE_KEY,
+            "Minimum Covered Positives per Rule", MLAlgorithmParamType.ML_INT_PARAM);
+    result[3] = new TextRulerLearnerParameter(BasicLP2.MAX_ERROR_THRESHOLD_KEY,
+            "Maximum Error Threshold", MLAlgorithmParamType.ML_FLOAT_PARAM);
+    result[4] = new TextRulerLearnerParameter(BasicLP2.CURRENT_CONTEXTUAL_RULES_SIZE_KEY,
+            "Contextual Rules List Size", MLAlgorithmParamType.ML_INT_PARAM);
+
+    return result;
+  }
+
+  public Map<String, Object> getAlgorithmParameterStandardValues() {
+    Map<String, Object> result = new HashMap<String, Object>();
+    result.put(BasicLP2.WINDOW_SIZE_KEY, BasicLP2.STANDARD_WINDOW_SIZE);
+    result
+            .put(BasicLP2.CURRENT_BEST_RULES_SIZE_KEY,
+                    BasicLP2.STANDARD_MAX_CURRENT_BEST_RULES_COUNT);
+    result.put(BasicLP2.MIN_COVERED_POSITIVES_PER_RULE_KEY,
+            BasicLP2.STANDARD_MIN_COVERED_POSITIVES_PER_RULE);
+    result.put(BasicLP2.MAX_ERROR_THRESHOLD_KEY, BasicLP2.STANDARD_MAX_ERROR_THRESHOLD);
+    result.put(BasicLP2.CURRENT_CONTEXTUAL_RULES_SIZE_KEY,
+            BasicLP2.STANDARD_MAX_CONTEXTUAL_RULES_COUNT);
+    return result;
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2Factory.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2Factory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2PreferencePage.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2PreferencePage.java?rev=1157037&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2PreferencePage.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2PreferencePage.java Fri Aug 12 10:32:50 2011
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.textmarker.textruler.learner.lp2;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.uima.textmarker.textruler.TextRulerPlugin;
+import org.apache.uima.textmarker.textruler.extension.TextRulerController;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerController;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerFactory;
+import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerParameter;
+import org.eclipse.jface.preference.BooleanFieldEditor;
+import org.eclipse.jface.preference.FieldEditor;
+import org.eclipse.jface.preference.IPreferenceStore;
+import org.eclipse.jface.preference.PreferencePage;
+import org.eclipse.jface.preference.StringFieldEditor;
+import org.eclipse.swt.SWT;
+import org.eclipse.swt.layout.GridData;
+import org.eclipse.swt.layout.GridLayout;
+import org.eclipse.swt.widgets.Composite;
+import org.eclipse.swt.widgets.Control;
+import org.eclipse.ui.IWorkbench;
+import org.eclipse.ui.IWorkbenchPreferencePage;
+
+public class NaiveLP2PreferencePage extends PreferencePage implements IWorkbenchPreferencePage {
+
+  public static String ID = "org.apache.uima.textmarker.textruler.algorithmPages";
+
+  private TextRulerLearnerController algorithmController;
+
+  private IPreferenceStore store;
+
+  private ArrayList<FieldEditor> fields = new ArrayList<FieldEditor>();
+
+  public NaiveLP2PreferencePage() {
+    TextRulerLearnerController ctrl = TextRulerController
+            .getControllerForID("org.apache.uima.textmarker.textruler.lp2naive");
+    this.algorithmController = ctrl;
+    store = TextRulerPlugin.getDefault().getPreferenceStore();
+    setPreferenceStore(store);
+  }
+
+  @Override
+  protected Control createContents(Composite parent) {
+    Composite top = new Composite(parent, SWT.LEFT);
+    top.setLayoutData(new GridData(GridData.FILL_HORIZONTAL));
+    top.setLayout(new GridLayout());
+
+    TextRulerLearnerFactory f = algorithmController.getFactory();
+    TextRulerLearnerParameter[] params = f.getAlgorithmParameters();
+    Map<String, Object> values = f.getAlgorithmParameterStandardValues();
+    if (params != null) {
+      for (int i = 0; i < params.length; i++) {
+        TextRulerLearnerParameter p = params[i];
+        String id = algorithmController.getID() + "." + p.id;
+        FieldEditor l = null;
+        switch (p.type) {
+          case ML_BOOL_PARAM: {
+            l = new BooleanFieldEditor(id, p.name, top);
+            fields.add(l);
+            store.setDefault(id, (Boolean) values.get(p.id));
+            l.setPreferenceStore(store);
+            l.load();
+            break;
+          }
+
+          case ML_FLOAT_PARAM:
+          case ML_INT_PARAM:
+          case ML_STRING_PARAM: {
+            l = new StringFieldEditor(id, p.name, top);
+            fields.add(l);
+            store.setDefault(id, values.get(p.id).toString());
+            l.setPreferenceStore(store);
+            l.load();
+            break;
+          }
+          case ML_SELECT_PARAM:
+            break;
+        }
+      }
+    }
+    return top;
+  }
+
+  @Override
+  public void init(IWorkbench workbench) {
+  }
+
+  @Override
+  protected void performDefaults() {
+    for (FieldEditor f : fields)
+      f.loadDefault();
+    // super.performDefaults();
+  }
+
+  @Override
+  public boolean performOk() {
+    for (FieldEditor f : fields)
+      f.store();
+    // return super.performOk();
+    return true;
+  }
+
+}

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2PreferencePage.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-textruler/src/main/java/org/apache/uima/textmarker/textruler/learner/lp2/NaiveLP2PreferencePage.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain