You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2014/02/13 11:16:46 UTC

svn commit: r1567875 - in /uima/ruta/trunk: ruta-docbook/src/docbook/ ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/ ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/ ruta-ep-textruler/src/main/java/org/apache/uima/...

Author: pkluegl
Date: Thu Feb 13 10:16:45 2014
New Revision: 1567875

URL: http://svn.apache.org/r1567875
Log:
UIMA-3539
- improved Workbench/TextRuler for missing descriptors

Modified:
    uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml
    uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java
    uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java
    uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java

Modified: uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml?rev=1567875&r1=1567874&r2=1567875&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml (original)
+++ uima/ruta/trunk/ruta-docbook/src/docbook/tools.ruta.workbench.textruler.xml Thu Feb 13 10:16:45 2014
@@ -257,6 +257,7 @@ under the License.
       <para> 
       This section gives a short example how the TextRuler framework is applied in order to induce annotation rules. We refer to the screenshot in <xref linkend="figure.tools.ruta.workbench.textruler.main"/>
       for the configuration and are using the exemplary UIMA Ruta project <quote>TextRulerExample</quote>, which is part of the source release of UIMA Ruta.
+      After importing the project into your workspace, please rebuild all UIMA Ruta scripts in order to create the descriptors, e.g., by cleaning the project.
       </para>
       <para> 
         In this example, we are using the <quote>KEP</quote> algorithm for learning annotation rules for identifying Bibtex entries in the reference section of scientific publications:

Modified: uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java?rev=1567875&r1=1567874&r2=1567875&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java (original)
+++ uima/ruta/trunk/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/validator/LanguageCheckerVisitor.java Thu Feb 13 10:16:45 2014
@@ -765,9 +765,8 @@ public class LanguageCheckerVisitor exte
         return true;
       }
     }
-    if( featureName.equals("begin")
-            || featureName.equals("end")) {
-      return kind ==-1 || kind == RutaTypeConstants.RUTA_TYPE_N;
+    if (featureName.equals("begin") || featureName.equals("end")) {
+      return kind == -1 || kind == RutaTypeConstants.RUTA_TYPE_N;
     }
     Set<FeatureDescription> set = featureDescriptionMap.get(longTypeName);
     if (set != null) {
@@ -807,7 +806,7 @@ public class LanguageCheckerVisitor exte
 
   private String getLongNameOfNewType(String shortName) {
     String moduleName = sourceModule.getElementName();
-    moduleName = moduleName.substring(0, moduleName.length()-5);
+    moduleName = moduleName.substring(0, moduleName.length() - 5);
     String packagePrefix = "";
     if (!packageName.isEmpty()) {
       packagePrefix = packageName + ".";
@@ -897,6 +896,9 @@ public class LanguageCheckerVisitor exte
 
   private void initializePredefinedInformation() {
 
+    typeDescriptionMap = new HashMap<String, TypeDescription>();
+    featureDescriptionMap = new HashMap<String, Set<FeatureDescription>>();
+
     try {
       typeSystemDescription = getTypeSystemOfScript();
       IPath descriptorRootPath = RutaProjectUtils.getDescriptorRootPath(sourceModule
@@ -906,19 +908,18 @@ public class LanguageCheckerVisitor exte
     } catch (Exception e) {
       RutaIdeUIPlugin.error(e);
     }
+    if (typeSystemDescription != null) {
+      TypeDescription[] descriptions = typeSystemDescription.getTypes();
+      for (TypeDescription typeDescription : descriptions) {
+        String typeName = typeDescription.getName();
+        typeDescriptionMap.put(typeName, typeDescription);
+      }
 
-    typeDescriptionMap = new HashMap<String, TypeDescription>();
-    TypeDescription[] descriptions = typeSystemDescription.getTypes();
-    for (TypeDescription typeDescription : descriptions) {
-      String typeName = typeDescription.getName();
-      typeDescriptionMap.put(typeName, typeDescription);
-    }
-
-    featureDescriptionMap = new HashMap<String, Set<FeatureDescription>>();
-    for (TypeDescription typeDescription : descriptions) {
-      Set<FeatureDescription> allFeatures = getAllDeclaredFeatures(typeDescription,
-              typeDescriptionMap);
-      featureDescriptionMap.put(typeDescription.getName(), allFeatures);
+      for (TypeDescription typeDescription : descriptions) {
+        Set<FeatureDescription> allFeatures = getAllDeclaredFeatures(typeDescription,
+                typeDescriptionMap);
+        featureDescriptionMap.put(typeDescription.getName(), allFeatures);
+      }
     }
 
     List<String> uimaPredefTypes = Arrays.asList(new String[] { "uima.cas.Boolean",

Modified: uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java?rev=1567875&r1=1567874&r2=1567875&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java (original)
+++ uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.java Thu Feb 13 10:16:45 2014
@@ -152,6 +152,11 @@ public abstract class TextRulerBasicLear
 
     AnalysisEngineDescription description = TextRulerToolkit
             .getAnalysisEngineDescription(descriptorFile);
+    if (description == null) {
+      sendStatusUpdateToDelegate("Failed to load descriptor. Please rebuild the project.",
+              TextRulerLearnerState.ML_INITIALIZING, false);
+      return;
+    }
     TextRulerToolkit.addBoundaryTypes(description, slotNames);
     ae = TextRulerToolkit.loadAnalysisEngine(description);
 
@@ -159,7 +164,8 @@ public abstract class TextRulerBasicLear
     // the FILTERTYPE expression!
     String tempRulesFileName = getTempRulesFileName();
     IPath path = new Path(tempRulesFileName);
-    ae.setConfigParameterValue(RutaEngine.PARAM_MAIN_SCRIPT, path.removeFileExtension().lastSegment());
+    ae.setConfigParameterValue(RutaEngine.PARAM_MAIN_SCRIPT, path.removeFileExtension()
+            .lastSegment());
     String portableString = path.removeLastSegments(1).toPortableString();
     ae.setConfigParameterValue(RutaEngine.PARAM_SCRIPT_PATHS, new String[] { portableString });
     ae.setConfigParameterValue(RutaEngine.PARAM_ADDITIONAL_SCRIPTS, new String[0]);
@@ -183,6 +189,9 @@ public abstract class TextRulerBasicLear
   protected boolean checkForMandatoryTypes() {
     // check if all passed slot types are present:
     CAS someCas = getTestCAS();
+    if (someCas == null) {
+      return false;
+    }
     TypeSystem ts = someCas.getTypeSystem();
     // GlobalCASSource.releaseCAS(someCas);
     boolean result = true;
@@ -218,6 +227,9 @@ public abstract class TextRulerBasicLear
   public void run() {
     if (createTempDirIfNeccessary()) {
       updateAE();
+      if (ae == null) {
+        return;
+      }
       inducedRules.clear();
       if (!checkForMandatoryTypes()) {
 
@@ -410,19 +422,19 @@ public abstract class TextRulerBasicLear
         theRule.setCoveringStatistics(inducedRules.get(ruleString));
         System.out.println("skipped with " + inducedRules.get(ruleString));
       } else {
-      TextRulerStatisticsCollector sumC = sums.get(ruleIndex);
-      for (TextRulerExampleDocument theDoc : sortedDocs) {
-        theDoc.resetAndFillTestCAS(theTestCAS, target);
-        testRuleOnDocument(theRule, theDoc, sumC, theTestCAS);
-        double errorRate = sumC.n / Math.max(sumC.p, 1);
-        if (errorRate > maxErrorRate) {
-          System.out.println("stopped:" + sumC);
-          break;
+        TextRulerStatisticsCollector sumC = sums.get(ruleIndex);
+        for (TextRulerExampleDocument theDoc : sortedDocs) {
+          theDoc.resetAndFillTestCAS(theTestCAS, target);
+          testRuleOnDocument(theRule, theDoc, sumC, theTestCAS);
+          double errorRate = sumC.n / Math.max(sumC.p, 1);
+          if (errorRate > maxErrorRate) {
+            System.out.println("stopped:" + sumC);
+            break;
+          }
+          if (shouldAbort())
+            return;
         }
-        if (shouldAbort())
-          return;
-      }
-      inducedRules.put(ruleString, sumC);
+        inducedRules.put(ruleString, sumC);
       }
     }
     theTestCAS.reset();

Modified: uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java?rev=1567875&r1=1567874&r2=1567875&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java (original)
+++ uima/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/extension/TextRulerPreprocessor.java Thu Feb 13 10:16:45 2014
@@ -53,6 +53,11 @@ public class TextRulerPreprocessor {
     AnalysisEngineDescription analysisEngineDescription = TextRulerToolkit
             .getAnalysisEngineDescription(TextRulerToolkit
                     .getEngineDescriptorFromTMSourceFile(new Path(tmFile)));
+    if(analysisEngineDescription == null) {
+      delegate.preprocessorStatusUpdate(this, "Descriptor is missing. Please rebuild the project.");
+      return null;
+    }
+    
     // we want to reuse these cases, so extend the type system in case a boundary-based learner is
     // called
     TextRulerToolkit.addBoundaryTypes(analysisEngineDescription, currentSlotNames);