You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2013/06/07 22:48:36 UTC
svn commit: r1490815 [1/3] - in /uima/sandbox/ruta/trunk/ruta-ep-textruler:
./ src/main/java/org/apache/uima/ruta/textruler/extension/
src/main/java/org/apache/uima/ruta/textruler/learner/trabal/
Author: pkluegl
Date: Fri Jun 7 20:48:36 2013
New Revision: 1490815
URL: http://svn.apache.org/r1490815
Log:
UIMA-2858
- initial import of trabal
- fixed preprocessing of the additional documents
Added:
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationError.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationErrorType.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationRule.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/Condition.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ConditionType.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/CorrectionRule.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/DeletionRule.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ExpansionRule.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/RankedList.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftAllRule.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftingRule.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalAnnotation.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalFactory.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalLearner.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalPreferencePage.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalRule.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalRuleItem.java
Modified:
uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerController.java
uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java
Modified: uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml?rev=1490815&r1=1490814&r2=1490815&view=diff
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml (original)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/plugin.xml Fri Jun 7 20:48:36 2013
@@ -216,6 +216,23 @@ under the License.
</extension>
-->
<extension
+ point="org.apache.uima.ruta.textruler.learners">
+ <learner
+ class="org.apache.uima.ruta.textruler.learner.trabal.TrabalFactory"
+ id="org.apache.uima.ruta.textruler.trabal"
+ name="TraBaL">
+ </learner>
+ </extension>
+ <extension
+ point="org.eclipse.ui.preferencePages">
+ <page
+ category="org.apache.uima.ruta.textruler"
+ class="org.apache.uima.ruta.textruler.learner.trabal.TrabalPreferencePage"
+ id="org.apache.uima.ruta.textruler.trabal"
+ name="TraBaL">
+ </page>
+ </extension>
+ <extension
point="org.eclipse.core.runtime.preferences">
<initializer
class="org.apache.uima.ruta.textruler.preferences.TextRulerPreferenceInitializer">
Modified: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerController.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerController.java?rev=1490815&r1=1490814&r2=1490815&view=diff
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerController.java (original)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerController.java Fri Jun 7 20:48:36 2013
@@ -184,7 +184,20 @@ public class TextRulerController {
}
});
- preprocessorDidEnd(algorithmsInputFolder, additionalFolder);
+ String algorithmsAdditionalFolder = p.run(additionalFolder, "additional", currentPreprocessorTMFile, currentTempDir,
+ currentSlotNames, new TextRulerPreprocessorDelegate() {
+
+ public void preprocessorStatusUpdate(TextRulerPreprocessor p,
+ String statusString) {
+ currentDelegate.preprocessorStatusUpdate(p, statusString);
+ }
+
+ public boolean shouldAbort() {
+ return TextRulerController.shouldAbort();
+ }
+
+ });
+ preprocessorDidEnd(algorithmsInputFolder, algorithmsAdditionalFolder);
}
});
if (currentDelegate != null)
Modified: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java?rev=1490815&r1=1490814&r2=1490815&view=diff
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java (original)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java Fri Jun 7 20:48:36 2013
@@ -32,23 +32,30 @@ import org.eclipse.core.runtime.Path;
/**
* This "algorithm" gets executed right before the real algorithms get started. It preprocesses the
- * input XMI files with the given Ruta preprocessing file and stores the results in a
- * temporary folder. Those new XMI files are then passed as input documents to the MLAlgorithms.
+ * input XMI files with the given Ruta preprocessing file and stores the results in a temporary
+ * folder. Those new XMI files are then passed as input documents to the MLAlgorithms.
*
*/
public class TextRulerPreprocessor {
public String run(String inFolder, String tmFile, String tmpDir, String[] currentSlotNames,
TextRulerPreprocessorDelegate delegate) {
- AnalysisEngineDescription analysisEngineDescription = TextRulerToolkit.getAnalysisEngineDescription(TextRulerToolkit
- .getEngineDescriptorFromTMSourceFile(new Path(tmFile)));
- // we want to reuse these cases, so extend the type system in case a boundary-based learner is called
+ return run(inFolder, "input", tmFile, tmpDir, currentSlotNames, delegate);
+ }
+
+ public String run(String inFolder, String docType, String tmFile, String tmpDir,
+ String[] currentSlotNames, TextRulerPreprocessorDelegate delegate) {
+ AnalysisEngineDescription analysisEngineDescription = TextRulerToolkit
+ .getAnalysisEngineDescription(TextRulerToolkit
+ .getEngineDescriptorFromTMSourceFile(new Path(tmFile)));
+ // we want to reuse these cases, so extend the type system in case a boundary-based learner is
+ // called
TextRulerToolkit.addBoundaryTypes(analysisEngineDescription, currentSlotNames);
AnalysisEngine ae = TextRulerToolkit.loadAnalysisEngine(analysisEngineDescription);
// preprocess input XMIs
File inputFolder = new File(inFolder);
- File outputFolder = new File(tmpDir + "input");
+ File outputFolder = new File(tmpDir + docType);
File[] files = inputFolder.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return (name.endsWith(".xmi"));
@@ -70,7 +77,8 @@ public class TextRulerPreprocessor {
}
TextRulerToolkit.log("Load INPUT XMI file: " + file.getName());
if (delegate != null)
- delegate.preprocessorStatusUpdate(this, "Loading input XMI file: " + file.getName());
+ delegate.preprocessorStatusUpdate(this,
+ "Loading input XMI file (" + docType + "): " + file.getName());
cas = TextRulerToolkit.readCASfromXMIFile(file, ae, cas);
System.out.print("Processing...");
try {
@@ -89,7 +97,7 @@ public class TextRulerPreprocessor {
cas.reset();
GlobalCASSource.releaseCAS(cas);
}
- return tmpDir + "input";
+ return tmpDir + docType;
}
}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationError.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationError.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationError.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationError.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import org.apache.uima.ruta.textruler.core.TextRulerExample;
+
+public class AnnotationError implements Comparable<AnnotationError> {
+ private TextRulerExample error;
+
+ private TextRulerExample truth;
+
+ private AnnotationErrorType type;
+
+ public AnnotationError(TextRulerExample error, TextRulerExample truth, AnnotationErrorType type) {
+ this.error = error;
+ this.truth = truth;
+ this.type = type;
+ }
+
+ public TextRulerExample getError() {
+ return error;
+ }
+
+ public TextRulerExample getTruth() {
+ return truth;
+ }
+
+ public AnnotationErrorType getType() {
+ return type;
+ }
+
+ public TrabalAnnotation getAnnotation() {
+ if (error != null)
+ return (TrabalAnnotation) error.getAnnotation();
+ return null;
+ }
+
+ public TrabalAnnotation getTargetAnnotation() {
+ if (truth != null)
+ return (TrabalAnnotation) truth.getAnnotation();
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ if (error != null && truth != null)
+ return type + ": " + error.getAnnotation().getType().getShortName() + "("
+ + error.getAnnotation().getBegin() + ", " + error.getAnnotation().getEnd() + ") -> "
+ + truth.getAnnotation().getType().getShortName() + "("
+ + truth.getAnnotation().getBegin() + ", " + truth.getAnnotation().getEnd() + ")";
+ if (truth != null)
+ return type + ": " + truth.getAnnotation().getType().getShortName() + "("
+ + truth.getAnnotation().getBegin() + ", " + truth.getAnnotation().getEnd() + ")";
+ return type + ": " + error.getAnnotation().getType().getShortName() + "("
+ + error.getAnnotation().getBegin() + ", " + error.getAnnotation().getEnd() + ")";
+ }
+
+ public int compareTo(AnnotationError o) {
+ if (error != null) {
+ if (o.getError() != null) {
+ int comp = error.getAnnotation().getType().toString()
+ .compareTo(o.getError().getAnnotation().getType().toString());
+ if (comp != 0)
+ return comp;
+ } else
+ return 1;
+ } else if (o.getError() != null)
+ return -1;
+ if (truth != null) {
+ if (o.getTruth() != null) {
+ int comp = truth.getAnnotation().getType().toString()
+ .compareTo(o.getTruth().getAnnotation().getType().toString());
+ if (comp != 0)
+ return comp;
+ } else
+ return 1;
+ } else if (o.getTruth() != null)
+ return -1;
+ return type.compareTo(o.getType());
+ }
+
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationErrorType.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationErrorType.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationErrorType.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationErrorType.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+public enum AnnotationErrorType {
+ CORRECTION, SHIFTING_LEFT, SHIFTING_RIGHT, EXPANSION, DELETION, ANNOTATION
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationRule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationRule.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationRule.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/AnnotationRule.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import java.util.ArrayList;
+
+import org.apache.uima.ruta.textruler.core.TextRulerTarget;
+
+public class AnnotationRule extends TrabalRule {
+
+ private TrabalRuleItem frontBoundaryItem;
+
+ private TrabalRuleItem rearBoundaryItem;
+
+ public AnnotationRule(TrabalLearner parentAlgorithm, TextRulerTarget target,
+ TrabalAnnotation targetAnnotation) {
+ super(parentAlgorithm, target);
+ this.targetAnnotation = targetAnnotation;
+ this.errorType = AnnotationErrorType.ANNOTATION;
+ addConditionTypes();
+ }
+
+ public AnnotationRule(AnnotationRule copyFrom) {
+ super((TrabalLearner) copyFrom.algorithm, copyFrom.target);
+ this.targetAnnotation = copyFrom.targetAnnotation;
+ this.errorType = AnnotationErrorType.ANNOTATION;
+ this.frontBoundaryItem = copyFrom.frontBoundaryItem;
+ this.rearBoundaryItem = copyFrom.rearBoundaryItem;
+ this.conditions = new ArrayList<Condition>();
+ for (Condition c : copyFrom.conditions)
+ conditions.add(c.clone());
+ addConditionTypes();
+ }
+
+ private void addConditionTypes() {
+ conditionTypes = new ArrayList<ConditionType>();
+ conditionTypes.add(ConditionType.STARTSWITH);
+ conditionTypes.add(ConditionType.ENDSWITH);
+ conditionTypes.add(ConditionType.CONTAINS);
+ conditionTypes.add(ConditionType.PARTOF);
+ }
+
+ public void compileRuleString() {
+ boolean frontItemInBorders = frontBoundaryItem.getAnnotation().getBegin() == targetAnnotation
+ .getBegin();
+ boolean rearItemInBorders = rearBoundaryItem.getAnnotation().getEnd() == targetAnnotation
+ .getEnd();
+
+ // this string will be attached to the front boundary item
+ String after = parseConditions(ConditionType.AFTER);
+ if (after.length() > 0)
+ after = "{" + after + "}";
+
+ // this string will be attached to the rear boundary item
+ String before = parseConditions(ConditionType.BEFORE);
+ if (before.length() > 0)
+ before = "{" + before + "}";
+
+ // these strings will be attached to the brackets
+ String conditions = parseConditions(conditionTypes);
+ if (conditions.length() > 0)
+ conditions += ", ";
+ String nPartof = "-PARTOF(" + targetAnnotation.getType().getShortName() + ")";
+ String mark;
+ if (((TrabalLearner) algorithm).getEnableFeatures())
+ mark = "-> CREATE(" + targetAnnotation.getType().getShortName() + parseFeatures() + ")";
+ else
+ mark = "-> MARK(" + targetAnnotation.getType().getShortName() + ")";
+ String action = "{" + conditions + nPartof + " " + mark + "}";
+
+ // These are the four possible patterns:
+ // ( frontBoundaryItem after ANY*? rearBoundaryItem before ) action ;
+ // frontBoundaryItem after ( ANY*? rearBoundaryItem before ) action ;
+ // ( frontBoundaryItem after ANY*? ) action rearBoundaryItem before ;
+ // frontBoundaryItem after ( ANY*? ) action rearBoundaryItem before ;
+
+ ruleString = "";
+
+ // If the front item is part of the future annotation, it has to be included in the brackets.
+ if (frontItemInBorders)
+ ruleString += "(" + frontBoundaryItem + after + " ";
+ else
+ ruleString += frontBoundaryItem + after + " (";
+
+ // We include all tokens between the boundaries.
+ ruleString += "ANY*{-PARTOF(" + rearBoundaryItem + ")}"; // like ANY*? but faster
+
+ // Check, if the rear item should be included and mark all tokens between the brackets as the
+ // new annotation.
+ if (rearItemInBorders)
+ ruleString += " " + rearBoundaryItem + before + ")" + action + ";";
+ else
+ ruleString += ")" + action + " " + rearBoundaryItem + before + ";";
+
+ setNeedsCompile(false);
+ }
+
+ public boolean contains(TrabalAnnotation target) {
+ if (target != null) {
+ if (targetAnnotation.getType().getShortName().equals(target.getType().getShortName())) {
+ return true;
+ }
+ if (frontBoundaryItem.getName().equals(target.getType().getShortName()))
+ return true;
+ if (rearBoundaryItem.getName().equals(target.getType().getShortName()))
+ return true;
+ for (Condition each : getConditions())
+ if (each.getItem().getName().equals(target.getType().getShortName()))
+ return true;
+ }
+ return false;
+ }
+
+ public boolean hasSameBasicRule(TrabalRule rule) {
+ if (rule.getAnnotation() != null)
+ return false;
+ if (rule.getTargetAnnotation() == null)
+ return false;
+ if (!rule.getTargetAnnotation().getType().getShortName()
+ .equals(targetAnnotation.getType().getShortName()))
+ return false;
+ return true;
+ }
+
+ @Override
+ public TrabalRuleItem getFrontBoundary() {
+ return frontBoundaryItem;
+ }
+
+ public void setFrontBoundary(TrabalRuleItem item) {
+ frontBoundaryItem = item;
+ }
+
+ @Override
+ public TrabalRuleItem getRearBoundary() {
+ return rearBoundaryItem;
+ }
+
+ public void setRearBoundary(TrabalRuleItem item) {
+ rearBoundaryItem = item;
+ }
+
+ @Override
+ public TrabalRule copy() {
+ return new AnnotationRule(this);
+ }
+
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/Condition.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/Condition.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/Condition.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/Condition.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+
+/**
+ * Conditions can be attached to TraBaL rules to reduce the chance of overfitting.
+ *
+ */
+public class Condition {
+
+ private ConditionType type;
+
+ private TrabalRuleItem item;
+
+ private boolean isNegative = false;
+
+ private boolean isExpansionCondition = false;
+
+ public Condition(ConditionType type, TrabalRuleItem item) {
+ this.type = type;
+ this.item = item;
+ }
+
+ public Condition(ConditionType type, TrabalRuleItem item, boolean isNegative,
+ boolean isExpansionCondition) {
+ this.type = type;
+ this.item = item;
+ this.isNegative = isNegative;
+ this.isExpansionCondition = isExpansionCondition;
+ }
+
+ public ConditionType getType() {
+ return type;
+ }
+
+ public TrabalRuleItem getItem() {
+ return item;
+ }
+
+ public boolean isNegative() {
+ return isNegative;
+ }
+
+ public void setNegative() {
+ isNegative = true;
+ }
+
+ public void setPositive() {
+ isNegative = false;
+ }
+
+ public void changePosNegValue() {
+ isNegative = !isNegative;
+ }
+
+ public boolean isExpansionCondition() {
+ return isExpansionCondition;
+ }
+
+ public void setExpansionCondition() {
+ isExpansionCondition = true;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ Condition other = (Condition) obj;
+ if (type != other.getType())
+ return false;
+ if (!item.getName().equals(other.getItem().getName()))
+ return false;
+ if (isNegative != other.isNegative)
+ return false;
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return toString().hashCode();
+ }
+
+ @Override
+ public String toString() {
+ if (isNegative)
+ return "-" + type.getConditionString(item.toString());
+ return type.getConditionString(item.toString());
+ }
+
+ @Override
+ public Condition clone() {
+ Condition result = new Condition(type, item, isNegative, isExpansionCondition);
+ return result;
+ }
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ConditionType.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ConditionType.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ConditionType.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ConditionType.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+/**
+ * The type of a TraBaL condition. The attached comments show the corresponding RUTA-Conditions -
+ * see RUTA rules for details.
+ *
+ */
+public enum ConditionType {
+
+ STARTSWITH("STARTSWITH(", ")"),
+
+ ENDSWITH("ENDSWITH(", ")"),
+
+ AFTER("NEAR(", ", 1, 1, false, true)"),
+
+ BEFORE("NEAR(", ", 1, 1, true, true)"),
+
+ PARTOF("PARTOF(", ")"),
+
+ CONTAINS("CONTAINS(", ")");
+
+ String prefix, suffix;
+
+ private ConditionType(String prefix, String suffix) {
+ this.prefix = prefix;
+ this.suffix = suffix;
+ }
+
+ public String getConditionString(String annotation) {
+ return prefix + annotation + suffix;
+ }
+
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/CorrectionRule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/CorrectionRule.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/CorrectionRule.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/CorrectionRule.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import java.util.ArrayList;
+
+import org.apache.uima.ruta.textruler.core.TextRulerTarget;
+
+public class CorrectionRule extends TrabalRule {
+
+ public CorrectionRule(TrabalLearner parentAlgorithm, TextRulerTarget target,
+ TrabalAnnotation annotation, TrabalAnnotation targetAnnotation) {
+ super(parentAlgorithm, target);
+ this.annotation = annotation;
+ this.targetAnnotation = targetAnnotation;
+ this.errorType = AnnotationErrorType.CORRECTION;
+ addConditionTypes();
+ }
+
+ public CorrectionRule(CorrectionRule copyFrom) {
+ super((TrabalLearner) copyFrom.algorithm, copyFrom.target);
+ this.annotation = copyFrom.annotation;
+ this.targetAnnotation = copyFrom.targetAnnotation;
+ this.errorType = AnnotationErrorType.CORRECTION;
+ this.conditions = new ArrayList<Condition>();
+ for (Condition c : copyFrom.conditions)
+ conditions.add(c.clone());
+ addConditionTypes();
+ }
+
+ private void addConditionTypes() {
+ conditionTypes = new ArrayList<ConditionType>();
+ conditionTypes.add(ConditionType.BEFORE);
+ conditionTypes.add(ConditionType.AFTER);
+ conditionTypes.add(ConditionType.STARTSWITH);
+ conditionTypes.add(ConditionType.ENDSWITH);
+ conditionTypes.add(ConditionType.CONTAINS);
+ conditionTypes.add(ConditionType.PARTOF);
+ }
+
+ public void compileRuleString() {
+ // parse conditions and add space if needed
+ String conditions = parseConditions(conditionTypes);
+ if (conditions.length() > 0)
+ conditions += " ";
+ // assemble rule string
+ if (((TrabalLearner) algorithm).getEnableFeatures()) {
+ ruleString = annotation.getType().getShortName() + "{" + conditions + "-> UNMARK("
+ + annotation.getType().getShortName() + ", true), CREATE("
+ + targetAnnotation.getType().getShortName() + parseFeatures() + ")};";
+ } else {
+ ruleString = annotation.getType().getShortName() + "{" + conditions + "-> UNMARK("
+ + annotation.getType().getShortName() + ", true), MARK("
+ + targetAnnotation.getType().getShortName() + ")};";
+ }
+ setNeedsCompile(false);
+ }
+
+ public boolean contains(TrabalAnnotation target) {
+ if (target == null)
+ return false;
+ if (annotation.getType().getShortName().equals(target.getType().getShortName()))
+ return true;
+ if (targetAnnotation.getType().getShortName().equals(target.getType().getShortName()))
+ return true;
+ for (Condition each : getConditions())
+ if (each.getItem().getName().equals(target.getType().getShortName()))
+ return true;
+ return false;
+ }
+
+ public boolean hasSameBasicRule(TrabalRule rule) {
+ if (rule.getAnnotation() == null)
+ return false;
+ if (!rule.getAnnotation().getType().equals(annotation.getType()))
+ return false;
+ if (rule.getTargetAnnotation() == null)
+ return false;
+ if (!rule.getTargetAnnotation().getType().equals(targetAnnotation.getType()))
+ return false;
+ return true;
+ }
+
+ @Override
+ public TrabalRuleItem getFrontBoundary() {
+ return new TrabalRuleItem(annotation);
+ }
+
+ @Override
+ public TrabalRuleItem getRearBoundary() {
+ return new TrabalRuleItem(annotation);
+ }
+
+ @Override
+ public TrabalRule copy() {
+ return new CorrectionRule(this);
+ }
+
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/DeletionRule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/DeletionRule.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/DeletionRule.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/DeletionRule.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import java.util.ArrayList;
+
+import org.apache.uima.ruta.textruler.core.TextRulerTarget;
+
+public class DeletionRule extends TrabalRule {
+
+ public DeletionRule(TrabalLearner parentAlgorithm, TextRulerTarget target,
+ TrabalAnnotation annotation) {
+ super(parentAlgorithm, target);
+ this.annotation = annotation;
+ this.errorType = AnnotationErrorType.DELETION;
+ addConditionTypes();
+ }
+
+ public DeletionRule(DeletionRule copyFrom) {
+ super((TrabalLearner) copyFrom.algorithm, copyFrom.target);
+ this.annotation = copyFrom.annotation;
+ this.errorType = AnnotationErrorType.DELETION;
+ this.conditions = new ArrayList<Condition>();
+ for (Condition c : copyFrom.conditions)
+ conditions.add(c.clone());
+ addConditionTypes();
+ }
+
+ private void addConditionTypes() {
+ conditionTypes = new ArrayList<ConditionType>();
+ conditionTypes.add(ConditionType.BEFORE);
+ conditionTypes.add(ConditionType.AFTER);
+ conditionTypes.add(ConditionType.STARTSWITH);
+ conditionTypes.add(ConditionType.ENDSWITH);
+ conditionTypes.add(ConditionType.CONTAINS);
+ conditionTypes.add(ConditionType.PARTOF);
+ }
+
+ public void compileRuleString() {
+ ruleString = annotation.getType().getShortName() + "{" + parseConditions(conditionTypes)
+ + "-> UNMARK(" + annotation.getType().getShortName() + ", true)};";
+ setNeedsCompile(false);
+ }
+
+ public boolean contains(TrabalAnnotation target) {
+ if (target != null) {
+ if (annotation.getType().getShortName().equals(target.getType().getShortName()))
+ return true;
+ for (Condition each : getConditions())
+ if (each.getItem().getName().equals(target.getType().getShortName()))
+ return true;
+ }
+ return false;
+ }
+
+ public boolean hasSameBasicRule(TrabalRule rule) {
+ if (rule.getTargetAnnotation() != null)
+ return false;
+ if (rule.getAnnotation() == null)
+ return false;
+ if (!rule.getAnnotation().getType().getShortName().equals(annotation.getType().getShortName()))
+ return false;
+ return true;
+ }
+
+ @Override
+ public TrabalRuleItem getFrontBoundary() {
+ return new TrabalRuleItem(annotation);
+ }
+
+ @Override
+ public TrabalRuleItem getRearBoundary() {
+ return new TrabalRuleItem(annotation);
+ }
+
+ @Override
+ public TrabalRule copy() {
+ return new DeletionRule(this);
+ }
+
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ExpansionRule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ExpansionRule.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ExpansionRule.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ExpansionRule.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import java.util.List;
+
+import org.apache.uima.ruta.textruler.core.TextRulerTarget;
+
+public class ExpansionRule extends ShiftingRule {
+
+ public ExpansionRule(TrabalLearner parentAlgorithm, TextRulerTarget target,
+ TrabalAnnotation annotation, TrabalAnnotation targetAnnotation,
+ AnnotationErrorType errorType) {
+ super(parentAlgorithm, target, annotation, targetAnnotation, errorType);
+ }
+
+ public ExpansionRule(ExpansionRule copyFrom) {
+ super(copyFrom);
+ }
+
+ @Override
+ public void compileRuleString() {
+ ruleString = "";
+
+ int errorBegin = annotation.getBegin();
+ int truthBegin = targetAnnotation.getBegin();
+ int truthEnd = targetAnnotation.getEnd();
+ boolean expandToLeft = truthBegin < errorBegin;
+
+ if (expandToLeft) // (frontBoundaryItem ANY*? annotation){-> MARK(targetAnnotation)};
+ expandToLeft(truthBegin);
+ else
+ // (annotation ANY*? rearBoundaryItem){-> MARK(targetAnnotation)};
+ expandToRight(truthEnd);
+ setNeedsCompile(false);
+ }
+
+ private void expandToLeft(int truthBegin) {
+ // should the boundary item be part of the new annotation?
+ boolean frontItemInBorders = frontBoundaryItem.getAnnotation().getBegin() == truthBegin;
+
+ // parse the positive conditions
+ String conditions = parseConditions(conditionTypes, true);
+ if (conditions.length() > 0)
+ conditions += " ";
+
+ // parse the negative conditions
+ conditionTypes.add(ConditionType.BEFORE);
+ String negativeConditions = parseConditions(conditionTypes, false);
+ if (negativeConditions.length() > 0)
+ negativeConditions += " ";
+
+ String before = parseConditions(ConditionType.BEFORE, true);
+ if (before.length() > 0 && negativeConditions.length() > 0)
+ negativeConditions = before + ", " + negativeConditions;
+ else if (before.length() > 0)
+ negativeConditions = before + " ";
+
+ // this strings will be attached to the original annotation
+ String unmark = "{" + negativeConditions + "-> UNMARK(" + annotation.getType().getShortName()
+ + ")}";
+
+ // this string will be attached to the front boundary item
+ String after = parseConditions(ConditionType.AFTER);
+ if (after.length() > 0)
+ after = "{" + after + "}";
+
+ // If the front item is part of the future annotation, it has to be included in the brackets.
+ if (frontItemInBorders)
+ ruleString += "(" + frontBoundaryItem + after + " ";
+ else
+ ruleString += frontBoundaryItem + after + " (";
+
+ // We include all tokens between the boundaries.
+ ruleString += "ANY*{-PARTOF(" + annotation.getType().getShortName() + ")} "; // like ANY*? but
+ // faster
+
+ // these strings will be attached to the brackets
+ String mark;
+ if (((TrabalLearner) algorithm).getEnableFeatures())
+ mark = "-> CREATE(" + targetAnnotation.getType().getShortName() + parseFeatures() + ")";
+ else
+ mark = "-> MARK(" + targetAnnotation.getType().getShortName() + ")";
+ String action = "{" + conditions + mark + "}";
+
+ // The original annotation represents the rear boundary item
+ ruleString += annotation.getType().getShortName() + unmark + ")" + action + ";";
+ }
+
+ private void expandToRight(int truthEnd) {
+ // should the boundary items be part of the new annotation?
+ boolean rearItemInBorders = rearBoundaryItem.getAnnotation().getEnd() == truthEnd;
+
+ // parse the positive conditions
+ String conditions = parseConditions(conditionTypes, true);
+ if (conditions.length() > 0)
+ conditions += " ";
+
+ // parse the negative conditions
+ conditionTypes.add(ConditionType.AFTER);
+ String negativeConditions = parseConditions(conditionTypes, false);
+ if (negativeConditions.length() > 0)
+ negativeConditions += " ";
+
+ String after = parseConditions(ConditionType.AFTER, true);
+ if (after.length() > 0 && negativeConditions.length() > 0)
+ negativeConditions = after + ", " + negativeConditions;
+ else if (after.length() > 0)
+ negativeConditions = after + " ";
+
+ // this strings will be attached to the original annotation
+ String unmark = "{" + negativeConditions + "-> UNMARK(" + annotation.getType().getShortName()
+ + ")}";
+
+ // The original annotation represents the front boundary item
+ ruleString += "(" + annotation.getType().getShortName() + unmark + " ";
+
+ // We include all tokens between the boundaries.
+ ruleString += "ANY*{-PARTOF(" + rearBoundaryItem + ")}"; // like ANY*? but faster
+
+ // these strings will be attached to the brackets
+ String mark;
+ if (((TrabalLearner) algorithm).getEnableFeatures())
+ mark = "-> CREATE(" + targetAnnotation.getType().getShortName() + parseFeatures() + ")";
+ else
+ mark = "-> MARK(" + targetAnnotation.getType().getShortName() + ")";
+ String action = "{" + conditions + mark + "}";
+
+ // this string will be attached to the rear boundary item
+ String before = parseConditions(ConditionType.BEFORE);
+ if (before.length() > 0)
+ before = "{" + before + "}";
+
+ // If the rear item is part of the future annotation, it has to be included in the brackets
+ if (rearItemInBorders)
+ ruleString += " " + rearBoundaryItem + before + ")" + action + ";";
+ else
+ ruleString += ")" + action + " " + rearBoundaryItem + before + ";";
+ }
+
+ /**
+ * Parse either the positive or the negative conditions without negation mark.
+ *
+ * @param types
+ * the types, that should be parsed
+ * @param positive
+ * true for positive, false for negative conditions
+ * @return String with parsed conditions
+ */
+ private String parseConditions(List<ConditionType> types, boolean positive) {
+ String result = "";
+ for (Condition each : conditions)
+ if (types.contains(each.getType()) && each.isNegative() != positive)
+ if (each.isNegative()) {
+ each.changePosNegValue();
+ result += each + ", ";
+ each.changePosNegValue();
+ } else
+ result += each + ", ";
+ if (result.length() > 0)
+ return result.substring(0, result.length() - 2);
+ return "";
+ }
+
+ /**
+ * Parse either the positive or the negative conditions without negation mark.
+ *
+ * @param type
+ * the type, that should be parsed
+ * @param positive
+ * true for positive, false for negative conditions
+ * @return String with parsed conditions
+ */
+ private String parseConditions(ConditionType type, boolean positive) {
+ String result = "";
+ for (Condition each : conditions)
+ if (each.getType() == type && each.isNegative() != positive)
+ if (each.isNegative()) {
+ each.changePosNegValue();
+ result += each + ", ";
+ each.changePosNegValue();
+ } else
+ result += each + ", ";
+ if (result.length() > 0)
+ return result.substring(0, result.length() - 2);
+ return "";
+ }
+
+ @Override
+ public TrabalRule copy() {
+ return new ExpansionRule(this);
+ }
+
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/RankedList.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/RankedList.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/RankedList.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/RankedList.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,316 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class RankedList extends ArrayList<Condition> {
+
+ private static final long serialVersionUID = -5978107285325156323L;
+
+ Map<Condition, Double> ranking;
+
+ Map<String, Double> idf;
+
+ public RankedList(Map<String, Double> i) {
+ super();
+ ranking = new HashMap<Condition, Double>();
+ idf = i;
+ }
+
+ public RankedList(Collection<? extends Condition> c, Map<String, Double> i) {
+ super();
+ ranking = new HashMap<Condition, Double>();
+ idf = i;
+ addAll(c);
+ }
+
+ /**
+ * Adds the item to the list and increases its ranking by one.
+ *
+ * @return true, if the item was newly inserted into the list.
+ */
+ public boolean add(Condition e) {
+ boolean result;
+ double value;
+ if (idf.containsKey(e.getItem().getAnnotation().getType().getShortName()))
+ value = idf.get(e.getItem().getAnnotation().getType().getShortName());
+ else
+ value = 1.0;
+ if (ranking.containsKey(e)) {
+ Double rank = ranking.get(e);
+ rank = new Double(rank.doubleValue() + value);
+ ranking.put(e, rank);
+ result = false;
+ } else {
+ super.add(e);
+ ranking.put(e, new Double(value));
+ result = true;
+ }
+ return result;
+ }
+
+ /**
+ * Adds the item to the list and increases its ranking by the given number.
+ *
+ */
+ public void add(double index, Condition e) {
+ if (ranking.containsKey(e)) {
+ Double rank = ranking.get(e);
+ rank = new Double(rank.doubleValue() + index);
+ ranking.put(e, rank);
+ } else {
+ super.add(e);
+ ranking.put(e, new Double(index));
+ }
+ }
+
+ /**
+ * Adds the items to the ranked list and increases their ranking by their value in the given list.
+ *
+ */
+ public void addAll(RankedList list) {
+ for (Condition each : list) {
+ add(list.rankingOf(each), each);
+ }
+ }
+
+ /**
+ * Adds the items to the list and increases their ranking by one.
+ *
+ */
+ public boolean addAll(Collection<? extends Condition> c) {
+ for (Condition each : c) {
+ add(each);
+ }
+ return true;
+ }
+
+ /**
+ * Adds the items to the list and increases their ranking by the given number.
+ *
+ */
+ public boolean addAll(double index, Collection<? extends Condition> c) {
+ for (Condition each : c) {
+ add(index, each);
+ }
+ return true;
+ }
+
+ public Condition remove(int index) {
+ Condition element = super.get(index);
+ if (element != null) {
+ if (ranking.containsKey(element)) {
+ ranking.remove(element);
+ super.remove(index);
+ return element;
+ }
+ }
+ return null;
+ }
+
+ public boolean remove(Object o) {
+ if (size() > 0) {
+ if (contains(o) && ranking.containsKey(o)) {
+ super.remove(o);
+ ranking.remove(o);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public List<Condition> subList(int start, int end) {
+ return super.subList(start, end);
+ }
+
+ public boolean contains(Object o) {
+ return super.contains(o);
+ }
+
+ public void clear() {
+ super.clear();
+ ranking.clear();
+ }
+
+ public int size() {
+ return super.size();
+ }
+
+ public RankedList clone() {
+ RankedList clone = new RankedList(idf);
+ for (Condition element : subList(0, size())) {
+ clone.add(rankingOf(element), element.clone());
+ }
+ return clone;
+ }
+
+ public Condition get(int i) {
+ return super.get(i);
+ }
+
+ /**
+ * Returns the ranking of the given object.
+ *
+ * @param each
+ * @return
+ */
+ public double rankingOf(Condition each) {
+ if (contains(each)) {
+ return ranking.get(each).doubleValue();
+ }
+ return 0;
+ }
+
+ public Map<Condition, Double> getRanking() {
+ return ranking;
+ }
+
+ /**
+ * Sorts the elements of the list in order of its ranking.
+ */
+ public void sort() {
+ List<Condition> newList = new ArrayList<Condition>();
+ for (int i = 0; i < size(); i++) {
+ for (int j = 0; j < newList.size(); j++) {
+ if (ranking.get(get(i)).doubleValue() >= ranking.get(newList.get(j)).doubleValue()) {
+ newList.add(j, get(i));
+ break;
+ }
+ }
+ if (!newList.contains(get(i)))
+ newList.add(get(i));
+ }
+ super.clear();
+ super.addAll(newList);
+ }
+
+ /**
+ * Unites the lists and adds their values for common elements.
+ *
+ * @param list
+ * A list of the same type.
+ */
+ public RankedList unite(RankedList list) {
+ RankedList clone = clone();
+ for (Condition element : list.subList(0, list.size())) {
+ clone.add(list.rankingOf(element), element.clone());
+ }
+ clone.sort();
+ return clone;
+ }
+
+ /**
+ * Removes all elements that are not part of both lists. Also adds the values of elements that
+ * occur in both lists.
+ *
+ * @param list
+ * A list of the same type.
+ */
+ public RankedList cut(RankedList list) {
+ RankedList clone = clone();
+ for (Condition element : subList(0, size())) {
+ if (list.contains(element)) {
+ clone.add(list.rankingOf(element), element.clone());
+ } else {
+ clone.remove(element);
+ }
+ }
+ clone.sort();
+ return clone;
+ }
+
+ /**
+ * Removes all elements of the given list that occur in this list. Also adds the values of
+ * elements that occur in both lists.
+ *
+ * @param list
+ * A list of the same type.
+ */
+ public RankedList subtract(RankedList list) {
+ RankedList clone = clone();
+ for (Condition element : subList(0, size())) {
+ if (list.contains(element)) {
+ clone.remove(element);
+ } else {
+ clone.add(list.rankingOf(element), element.clone());
+ }
+ }
+ clone.sort();
+ return clone;
+ }
+
+ @Override
+ public Condition set(int index, Condition element) {
+ if (size() >= index) {
+ double value = ranking.get(get(index));
+ if (contains(element)) {
+ ranking.put(element, new Double(ranking.get(element).doubleValue() + index));
+ super.remove(element);
+ } else
+ ranking.put(element, value);
+ ranking.remove(get(index));
+ }
+ return super.set(index, element);
+ }
+
+ /**
+ * Adds the element to the list and assigns the ranking of the element located at the index. If
+ * the element already exists, it is only assigned the ranking value.
+ */
+ @Override
+ public void add(int index, Condition element) {
+ if (size() >= index) {
+ double value = ranking.get(get(index));
+ if (!contains(element)) {
+ ranking.put(element, value);
+ super.add(index, element);
+ }
+ }
+ }
+
+ @Override
+ public boolean addAll(int index, Collection<? extends Condition> c) {
+ if (size() >= index) {
+ double value = ranking.get(get(index));
+ for (Condition element : c) {
+ if (!contains(element)) {
+ ranking.put(element, value);
+ super.add(index, element);
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ @Override
+ protected void removeRange(int fromIndex, int toIndex) {
+ for (int i = fromIndex; i < toIndex; i++) {
+ ranking.remove(get(i));
+ }
+ super.removeRange(fromIndex, toIndex);
+ }
+
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftAllRule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftAllRule.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftAllRule.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftAllRule.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import org.apache.uima.ruta.textruler.core.TextRulerTarget;
+
+public class ShiftAllRule extends ShiftingRule {
+
+ private TrabalRuleItem errorBoundaryItem;
+
+ public ShiftAllRule(TrabalLearner parentAlgorithm, TextRulerTarget target,
+ TrabalAnnotation annotation, TrabalAnnotation targetAnnotation,
+ AnnotationErrorType errorType) {
+ super(parentAlgorithm, target, annotation, targetAnnotation, errorType);
+ }
+
+ public ShiftAllRule(ShiftAllRule copyFrom) {
+ super(copyFrom);
+ this.errorBoundaryItem = copyFrom.errorBoundaryItem;
+ }
+
+ @Override
+ public void compileRuleString() {
+ ruleString = "";
+
+ // shift to left or right?
+ boolean shiftToLeft = errorType == AnnotationErrorType.SHIFTING_LEFT;
+
+ // should the boundary items be part of the new annotation?
+ boolean frontItemInBorders = frontBoundaryItem.getAnnotation().getBegin() == targetAnnotation
+ .getBegin();
+ boolean rearItemInBorders = rearBoundaryItem.getAnnotation().getEnd() == targetAnnotation
+ .getEnd();
+
+ // this string will be attached to the rear boundary item
+ String before = parseConditions(ConditionType.BEFORE);
+ if (before.length() > 0)
+ before = "{" + before + "}";
+
+ // the action string will be attached to the brackets, containing the conditions and mark action
+ String conditions = parseConditions(conditionTypes);
+ if (conditions.length() > 0)
+ conditions += " ";
+ String mark;
+ if (((TrabalLearner) algorithm).getEnableFeatures())
+ mark = "-> CREATE(" + targetAnnotation.getType().getShortName() + parseFeatures() + ")";
+ else
+ mark = "-> MARK(" + targetAnnotation.getType().getShortName() + ")";
+ String action = "{" + conditions + mark + "}";
+
+ if (errorBoundaryItem == null) {
+ // We only have 2 boundary items (reduction rule, where only the right boundary changes)
+ compileWithoutErrorRuleItem(frontItemInBorders, rearItemInBorders, before, action);
+ } else {
+ // this string will be attached to the front boundary item
+ String after = parseConditions(ConditionType.AFTER);
+ if (after.length() > 0)
+ after = "{" + after + "}";
+
+ // this part will delete the original annotation
+ String unmark = errorBoundaryItem + "{STARTSWITH(" + annotation.getType().getShortName()
+ + ") -> UNMARK(" + annotation.getType().getShortName() + ", true)}";
+
+ if (shiftToLeft)
+ compileShiftToLeft(frontItemInBorders, rearItemInBorders, before, after, action, unmark);
+ else
+ compileShiftToRight(frontItemInBorders, rearItemInBorders, before, after, action, unmark);
+ }
+ setNeedsCompile(false);
+ }
+
+ private void compileShiftToLeft(boolean frontItemInBorders, boolean rearItemInBorders,
+ String before, String after, String action, String unmark) {
+ // If the front item is part of the future annotation, it has to be included in the
+ // brackets.
+ if (frontItemInBorders)
+ ruleString += "(" + frontBoundaryItem + after + " ";
+ else
+ ruleString += frontBoundaryItem + after + " (";
+
+ // We include all tokens between the boundaries.
+ ruleString += "ANY*{-PARTOF(" + errorBoundaryItem + ")} " + unmark + " ANY*{-PARTOF("
+ + rearBoundaryItem + ")}"; // like ANY*? but faster
+
+ // Check, if the rear item should be included and mark all tokens between the brackets as
+ // the
+ // new annotation.
+ if (rearItemInBorders)
+ ruleString += " " + rearBoundaryItem + before + ")" + action + ";";
+ else
+ ruleString += ")" + action + " " + rearBoundaryItem + before + ";";
+ }
+
+ private void compileShiftToRight(boolean frontItemInBorders, boolean rearItemInBorders,
+ String before, String after, String action, String unmark) {
+ // The old annotation begins before the new annotation
+ ruleString += unmark + " ANY*{-PARTOF(" + frontBoundaryItem + ")} ";
+
+ // If the front item is part of the future annotation, it has to be included in the
+ // brackets.
+ if (frontItemInBorders)
+ ruleString += "(" + frontBoundaryItem + after + " ";
+ else
+ ruleString += frontBoundaryItem + after + " (";
+
+ // We include all tokens between the boundaries.
+ ruleString += "ANY*{-PARTOF(" + rearBoundaryItem + ")}"; // like ANY*? but faster
+
+ // Check, if the rear item should be included and mark all tokens between the brackets as
+ // the
+ // new annotation.
+ if (rearItemInBorders)
+ ruleString += " " + rearBoundaryItem + before + ")" + action + ";";
+ else
+ ruleString += ")" + action + " " + rearBoundaryItem + before + ";";
+ }
+
+ private void compileWithoutErrorRuleItem(boolean frontItemInBorders, boolean rearItemInBorders,
+ String before, String action) {
+ // this string will be attached to the front boundary item
+ String after = parseConditions(ConditionType.AFTER);
+ if (after.length() > 0)
+ after = ", " + after;
+
+ // this string deletes the original annotation
+ String unmark = "{STARTSWITH(" + annotation.getType().getShortName() + ")" + after
+ + " -> UNMARK(" + annotation.getType().getShortName() + ", true)}";
+
+ if (frontItemInBorders)
+ ruleString += "(" + frontBoundaryItem + unmark + " ";
+ else
+ ruleString += frontBoundaryItem + unmark + " (";
+
+ ruleString += "ANY*{-PARTOF(" + rearBoundaryItem + ")}"; // like ANY*? but faster
+
+ // Check, if the rear item should be included and mark all tokens between the brackets as the
+ // new annotation.
+ if (rearItemInBorders)
+ ruleString += " " + rearBoundaryItem + before + ")" + action + ";";
+ else
+ ruleString += ")" + action + " " + rearBoundaryItem + before + ";";
+ }
+
+ @Override
+ public boolean contains(TrabalAnnotation target) {
+ if (target != null) {
+ if (annotation.getType().getShortName().equals(target.getType().getShortName())) {
+ return true;
+ }
+ if (targetAnnotation.getType().getShortName().equals(target.getType().getShortName())) {
+ return true;
+ }
+ if (frontBoundaryItem != null)
+ if (frontBoundaryItem.equals(target))
+ return true;
+ if (errorBoundaryItem != null)
+ if (errorBoundaryItem.equals(target))
+ return true;
+ if (rearBoundaryItem != null)
+ if (rearBoundaryItem.equals(target))
+ return true;
+ for (Condition each : getConditions()) {
+ if (each.getItem().getAnnotation().getType().getShortName()
+ .equals(target.getType().getShortName())) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Sets the boundary item that marks the beginning of the original annotation.
+ *
+ * @param errorBoundaryItem
+ */
+ public void setErrorBoundaryItem(TrabalRuleItem errorBoundaryItem) {
+ this.errorBoundaryItem = errorBoundaryItem;
+ }
+
+ @Override
+ public TrabalRule copy() {
+ return new ShiftAllRule(this);
+ }
+
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftingRule.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftingRule.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftingRule.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftingRule.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import java.util.ArrayList;
+
+import org.apache.uima.ruta.textruler.core.TextRulerTarget;
+
+public abstract class ShiftingRule extends TrabalRule {
+
+ protected TrabalRuleItem frontBoundaryItem;
+
+ protected TrabalRuleItem rearBoundaryItem;
+
+ public ShiftingRule(TrabalLearner parentAlgorithm, TextRulerTarget target,
+ TrabalAnnotation annotation, TrabalAnnotation targetAnnotation,
+ AnnotationErrorType errorType) {
+ super(parentAlgorithm, target);
+ this.annotation = annotation;
+ this.targetAnnotation = targetAnnotation;
+ this.errorType = errorType;
+ addConditionTypes();
+ }
+
+ public ShiftingRule(ShiftingRule copyFrom) {
+ super((TrabalLearner) copyFrom.algorithm, copyFrom.target);
+ this.annotation = copyFrom.annotation;
+ this.targetAnnotation = copyFrom.targetAnnotation;
+ this.errorType = copyFrom.errorType;
+ this.frontBoundaryItem = copyFrom.frontBoundaryItem;
+ this.rearBoundaryItem = copyFrom.rearBoundaryItem;
+ this.conditions = new ArrayList<Condition>();
+ for (Condition c : copyFrom.conditions)
+ conditions.add(c.clone());
+ addConditionTypes();
+ }
+
+ protected void addConditionTypes() {
+ conditionTypes = new ArrayList<ConditionType>();
+ conditionTypes.add(ConditionType.STARTSWITH);
+ conditionTypes.add(ConditionType.ENDSWITH);
+ conditionTypes.add(ConditionType.CONTAINS);
+ conditionTypes.add(ConditionType.PARTOF);
+ }
+
+ public boolean contains(TrabalAnnotation target) {
+ if (target != null) {
+ if (annotation.getType().getShortName().equals(target.getType().getShortName())) {
+ return true;
+ }
+ if (targetAnnotation.getType().getShortName().equals(target.getType().getShortName())) {
+ return true;
+ }
+ if (frontBoundaryItem != null)
+ if (frontBoundaryItem.equals(target))
+ return true;
+ if (rearBoundaryItem != null)
+ if (rearBoundaryItem.equals(target))
+ return true;
+ for (Condition each : getConditions()) {
+ if (each.getItem().getAnnotation().getType().getShortName()
+ .equals(target.getType().getShortName())) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ public boolean hasSameBasicRule(TrabalRule rule) {
+ if (rule.getAnnotation() == null)
+ return false;
+ if (!rule.getAnnotation().getType().equals(annotation.getType()))
+ return false;
+ if (rule.getTargetAnnotation() == null)
+ return false;
+ if (!rule.getTargetAnnotation().getType().equals(targetAnnotation.getType()))
+ return false;
+ return true;
+ }
+
+ @Override
+ public TrabalRuleItem getFrontBoundary() {
+ if (annotation.getBegin() == targetAnnotation.getBegin())
+ return new TrabalRuleItem(annotation);
+ return frontBoundaryItem;
+ }
+
+ @Override
+ public TrabalRuleItem getRearBoundary() {
+ if (annotation.getEnd() == targetAnnotation.getEnd())
+ return new TrabalRuleItem(annotation);
+ return rearBoundaryItem;
+ }
+
+ public void setFrontBoundaryItem(TrabalRuleItem frontBoundaryItem) {
+ this.frontBoundaryItem = frontBoundaryItem;
+ }
+
+ public void setRearBoundaryItem(TrabalRuleItem rearBoundaryItem) {
+ this.rearBoundaryItem = rearBoundaryItem;
+ }
+
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalAnnotation.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalAnnotation.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalAnnotation.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalAnnotation.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
+import org.apache.uima.ruta.textruler.core.TextRulerExampleDocument;
+
+public class TrabalAnnotation extends TextRulerAnnotation {
+ private Map<String, String> features;
+
+ private boolean enableFeatures;
+
+ public TrabalAnnotation(AnnotationFS afs, boolean enableFeatures) {
+ super(afs);
+ if (afs.getType().getFeatures() != null) {
+ features = new HashMap<String, String>();
+ this.enableFeatures = enableFeatures;
+ for (Feature f : afs.getType().getFeatures()) {
+ try {
+ if (!TrabalLearner.FILTERED_FEATURES.contains(f.getShortName()))
+ features.put(f.getShortName(), afs.getFeatureValueAsString(f));
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ public TrabalAnnotation(AnnotationFS afs, TextRulerExampleDocument document,
+ boolean enableFeatures) {
+ super(afs, document);
+ if (afs.getType().getFeatures() != null) {
+ features = new HashMap<String, String>();
+ this.enableFeatures = enableFeatures;
+ for (Feature f : afs.getType().getFeatures()) {
+ try {
+ if (!TrabalLearner.FILTERED_FEATURES.contains(f.getShortName()))
+ features.put(f.getShortName(), afs.getFeatureValueAsString(f));
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ public Map<String, String> getFeatures() {
+ return features;
+ }
+
+ @Override
+ public String toString() {
+ String result = getType().getShortName();
+ result += "(" + getBegin() + ", " + getEnd() + ")";
+ if (features != null) {
+ result += "[";
+ for (String key : features.keySet()) {
+ result += key + ":" + features.get(key) + ", ";
+ }
+ result = result.substring(0, result.length() - 2);
+ result += "]";
+ }
+ result += " \"" + getCoveredText() + "\"";
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o.getClass().isInstance(this)) {
+ TrabalAnnotation obj = (TrabalAnnotation) o;
+ return getBegin() == obj.getBegin() && getEnd() == obj.getEnd()
+ && getType().getName().equals(obj.getType().getName()) && hasEqualFeatures(obj);
+ }
+ return false;
+ }
+
+ public boolean hasEqualFeatures(TrabalAnnotation obj) {
+ if (enableFeatures) {
+ if (features != null && obj.getFeatures() != null) {
+ if (features.size() != obj.getFeatures().size())
+ return false;
+ for (String key : features.keySet()) {
+ if (!obj.getFeatures().containsKey(key))
+ return false;
+ if (features.get(key) != null && obj.getFeatures().get(key) != null)
+ if (!features.get(key).equals(obj.getFeatures().get(key)))
+ return false;
+ }
+ } else {
+ if (this.features != null || obj.getFeatures() != null)
+ return false;
+ }
+ }
+ return true;
+ }
+}
Added: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalFactory.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalFactory.java?rev=1490815&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalFactory.java (added)
+++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalFactory.java Fri Jun 7 20:48:36 2013
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.textruler.learner.trabal;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.uima.ruta.textruler.extension.TextRulerLearner;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerFactory;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerParameter;
+import org.apache.uima.ruta.textruler.extension.TextRulerLearnerParameter.MLAlgorithmParamType;
+
+public class TrabalFactory implements TextRulerLearnerFactory {
+
+ public TrabalFactory() {
+ }
+
+ public Map<String, Object> getAlgorithmParameterStandardValues() {
+ Map<String, Object> values = new HashMap<String, Object>();
+ values.put(TrabalLearner.ALGORITHM_ITERATIONS_KEY, TrabalLearner.ALGORITHM_ITERATIONS);
+ values.put(TrabalLearner.MAX_NUMBER_OF_BASIC_RULES_KEY, TrabalLearner.MAX_NUMBER_OF_BASIC_RULES);
+ values.put(TrabalLearner.MAX_NUMBER_OF_RULES_KEY, TrabalLearner.MAX_NUMBER_OF_RULES);
+ values.put(TrabalLearner.MAX_NUMBER_OF_ITERATIONS_KEY, TrabalLearner.MAX_NUMBER_OF_ITERATIONS);
+ values.put(TrabalLearner.MAX_ERROR_RATE_KEY, TrabalLearner.MAX_ERROR_RATE);
+ values.put(TrabalLearner.ENABLE_FEATURES_KEY, TrabalLearner.ENABLE_FEATURES);
+ return values;
+ }
+
+ public TextRulerLearner createAlgorithm(String inputFolderPath, String additionalFolderPath,
+ String preprocessorRutaFile, String tempFolderPath, String[] fullSlotTypeNames,
+ Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
+ return new TrabalLearner(inputFolderPath, additionalFolderPath, preprocessorRutaFile,
+ tempFolderPath, fullSlotTypeNames, filterSet, skip, delegate);
+ }
+
+ public TextRulerLearnerParameter[] getAlgorithmParameters() {
+ TextRulerLearnerParameter[] result = new TextRulerLearnerParameter[6];
+ result[0] = new TextRulerLearnerParameter(TrabalLearner.ALGORITHM_ITERATIONS_KEY,
+ "Number of times, the algorithm iterates.", MLAlgorithmParamType.ML_INT_PARAM);
+ result[1] = new TextRulerLearnerParameter(TrabalLearner.MAX_NUMBER_OF_BASIC_RULES_KEY,
+ "Number of basic rules to be created for one example.",
+ MLAlgorithmParamType.ML_INT_PARAM);
+ result[2] = new TextRulerLearnerParameter(TrabalLearner.MAX_NUMBER_OF_RULES_KEY,
+ "Number of optimized rules to be created for one example.",
+ MLAlgorithmParamType.ML_INT_PARAM);
+ result[3] = new TextRulerLearnerParameter(TrabalLearner.MAX_NUMBER_OF_ITERATIONS_KEY,
+ "Maximum number of iterations, when optimizing rules.",
+ MLAlgorithmParamType.ML_INT_PARAM);
+ result[4] = new TextRulerLearnerParameter(TrabalLearner.MAX_ERROR_RATE_KEY,
+ "Maximum allowed error rate.", MLAlgorithmParamType.ML_DOUBLE_PARAM);
+ result[5] = new TextRulerLearnerParameter(TrabalLearner.ENABLE_FEATURES_KEY,
+ "Correct features in rules and conditions.", MLAlgorithmParamType.ML_BOOL_PARAM);
+ return result;
+ }
+
+}