You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2014/02/13 11:16:46 UTC
svn commit: r1567875 - in /uima/ruta/trunk: ruta-docbook/src/docbook/
ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/
ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/
ruta-ep-textruler/src/main/java/org/apache/uima/...
Author: pkluegl
Date: Thu Feb 13 10:16:45 2014
New Revision: 1567875
URL: http://svn.apache.org/r1567875
Log:
UIMA-3539
- improved Workbench/TextRuler for missing descriptors
Modified:
uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml
uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java
uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java
uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java
Modified: uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml?rev=1567875&r1=1567874&r2=1567875&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml (original)
+++ uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml Thu Feb 13 10:16:45 2014
@@ -257,6 +257,7 @@ under the License.
<para>
This section gives a short example how the TextRuler framework is applied in order to induce annotation rules. We refer to the screenshot in <xref linkend="figure.tools.ruta.workbench.textruler.main"/>
for the configuration and are using the exemplary UIMA Ruta project <quote>TextRulerExample</quote>, which is part of the source release of UIMA Ruta.
+ After importing the project into your workspace, please rebuild all UIMA Ruta scripts in order to create the descriptors, e.g., by cleaning the project.
</para>
<para>
In this example, we are using the <quote>KEP</quote> algorithm for learning annotation rules for identifying Bibtex entries in the reference section of scientific publications:
Modified: uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java?rev=1567875&r1=1567874&r2=1567875&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java (original)
+++ uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java Thu Feb 13 10:16:45 2014
@@ -765,9 +765,8 @@ public class LanguageCheckerVisitor exte
return true;
}
}
- if( featureName.equals("begin")
- || featureName.equals("end")) {
- return kind ==-1 || kind == RutaTypeConstants.RUTA_TYPE_N;
+ if (featureName.equals("begin") || featureName.equals("end")) {
+ return kind == -1 || kind == RutaTypeConstants.RUTA_TYPE_N;
}
Set<FeatureDescription> set = featureDescriptionMap.get(longTypeName);
if (set != null) {
@@ -807,7 +806,7 @@ public class LanguageCheckerVisitor exte
private String getLongNameOfNewType(String shortName) {
String moduleName = sourceModule.getElementName();
- moduleName = moduleName.substring(0, moduleName.length()-5);
+ moduleName = moduleName.substring(0, moduleName.length() - 5);
String packagePrefix = "";
if (!packageName.isEmpty()) {
packagePrefix = packageName + ".";
@@ -897,6 +896,9 @@ public class LanguageCheckerVisitor exte
private void initializePredefinedInformation() {
+ typeDescriptionMap = new HashMap<String, TypeDescription>();
+ featureDescriptionMap = new HashMap<String, Set<FeatureDescription>>();
+
try {
typeSystemDescription = getTypeSystemOfScript();
IPath descriptorRootPath = RutaProjectUtils.getDescriptorRootPath(sourceModule
@@ -906,19 +908,18 @@ public class LanguageCheckerVisitor exte
} catch (Exception e) {
RutaIdeUIPlugin.error(e);
}
+ if (typeSystemDescription != null) {
+ TypeDescription[] descriptions = typeSystemDescription.getTypes();
+ for (TypeDescription typeDescription : descriptions) {
+ String typeName = typeDescription.getName();
+ typeDescriptionMap.put(typeName, typeDescription);
+ }
- typeDescriptionMap = new HashMap<String, TypeDescription>();
- TypeDescription[] descriptions = typeSystemDescription.getTypes();
- for (TypeDescription typeDescription : descriptions) {
- String typeName = typeDescription.getName();
- typeDescriptionMap.put(typeName, typeDescription);
- }
-
- featureDescriptionMap = new HashMap<String, Set<FeatureDescription>>();
- for (TypeDescription typeDescription : descriptions) {
- Set<FeatureDescription> allFeatures = getAllDeclaredFeatures(typeDescription,
- typeDescriptionMap);
- featureDescriptionMap.put(typeDescription.getName(), allFeatures);
+ for (TypeDescription typeDescription : descriptions) {
+ Set<FeatureDescription> allFeatures = getAllDeclaredFeatures(typeDescription,
+ typeDescriptionMap);
+ featureDescriptionMap.put(typeDescription.getName(), allFeatures);
+ }
}
List<String> uimaPredefTypes = Arrays.asList(new String[] { "uima.cas.Boolean",
Modified: uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java?rev=1567875&r1=1567874&r2=1567875&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java (original)
+++ uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java Thu Feb 13 10:16:45 2014
@@ -152,6 +152,11 @@ public abstract class TextRulerBasicLear
AnalysisEngineDescription description = TextRulerToolkit
.getAnalysisEngineDescription(descriptorFile);
+ if (description == null) {
+ sendStatusUpdateToDelegate("Failed to load descriptor. Please rebuild the project.",
+ TextRulerLearnerState.ML_INITIALIZING, false);
+ return;
+ }
TextRulerToolkit.addBoundaryTypes(description, slotNames);
ae = TextRulerToolkit.loadAnalysisEngine(description);
@@ -159,7 +164,8 @@ public abstract class TextRulerBasicLear
// the FILTERTYPE expression!
String tempRulesFileName = getTempRulesFileName();
IPath path = new Path(tempRulesFileName);
- ae.setConfigParameterValue(RutaEngine.PARAM_MAIN_SCRIPT, path.removeFileExtension().lastSegment());
+ ae.setConfigParameterValue(RutaEngine.PARAM_MAIN_SCRIPT, path.removeFileExtension()
+ .lastSegment());
String portableString = path.removeLastSegments(1).toPortableString();
ae.setConfigParameterValue(RutaEngine.PARAM_SCRIPT_PATHS, new String[] { portableString });
ae.setConfigParameterValue(RutaEngine.PARAM_ADDITIONAL_SCRIPTS, new String[0]);
@@ -183,6 +189,9 @@ public abstract class TextRulerBasicLear
protected boolean checkForMandatoryTypes() {
// check if all passed slot types are present:
CAS someCas = getTestCAS();
+ if (someCas == null) {
+ return false;
+ }
TypeSystem ts = someCas.getTypeSystem();
// GlobalCASSource.releaseCAS(someCas);
boolean result = true;
@@ -218,6 +227,9 @@ public abstract class TextRulerBasicLear
public void run() {
if (createTempDirIfNeccessary()) {
updateAE();
+ if (ae == null) {
+ return;
+ }
inducedRules.clear();
if (!checkForMandatoryTypes()) {
@@ -410,19 +422,19 @@ public abstract class TextRulerBasicLear
theRule.setCoveringStatistics(inducedRules.get(ruleString));
System.out.println("skipped with " + inducedRules.get(ruleString));
} else {
- TextRulerStatisticsCollector sumC = sums.get(ruleIndex);
- for (TextRulerExampleDocument theDoc : sortedDocs) {
- theDoc.resetAndFillTestCAS(theTestCAS, target);
- testRuleOnDocument(theRule, theDoc, sumC, theTestCAS);
- double errorRate = sumC.n / Math.max(sumC.p, 1);
- if (errorRate > maxErrorRate) {
- System.out.println("stopped:" + sumC);
- break;
+ TextRulerStatisticsCollector sumC = sums.get(ruleIndex);
+ for (TextRulerExampleDocument theDoc : sortedDocs) {
+ theDoc.resetAndFillTestCAS(theTestCAS, target);
+ testRuleOnDocument(theRule, theDoc, sumC, theTestCAS);
+ double errorRate = sumC.n / Math.max(sumC.p, 1);
+ if (errorRate > maxErrorRate) {
+ System.out.println("stopped:" + sumC);
+ break;
+ }
+ if (shouldAbort())
+ return;
}
- if (shouldAbort())
- return;
- }
- inducedRules.put(ruleString, sumC);
+ inducedRules.put(ruleString, sumC);
}
}
theTestCAS.reset();
Modified: uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java?rev=1567875&r1=1567874&r2=1567875&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java (original)
+++ uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java Thu Feb 13 10:16:45 2014
@@ -53,6 +53,11 @@ public class TextRulerPreprocessor {
AnalysisEngineDescription analysisEngineDescription = TextRulerToolkit
.getAnalysisEngineDescription(TextRulerToolkit
.getEngineDescriptorFromTMSourceFile(new Path(tmFile)));
+ if(analysisEngineDescription == null) {
+ delegate.preprocessorStatusUpdate(this, "Descriptor is missing. Please rebuild the project.");
+ return null;
+ }
+
// we want to reuse these cases, so extend the type system in case a boundary-based learner is
// called
TextRulerToolkit.addBoundaryTypes(analysisEngineDescription, currentSlotNames);