You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2013/12/02 22:14:18 UTC

svn commit: r1547193 - in /uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps: DuccCasCC.java DuccCasCM.java DuccJobCasCR.java DuccJobTextCR.java DuccSampleAE.java DuccTextCM.java

Author: eae
Date: Mon Dec  2 21:14:17 2013
New Revision: 1547193

URL: http://svn.apache.org/r1547193
Log:
UIMA-3461 Add class comments for sample code

Modified:
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java Mon Dec  2 21:14:17 2013
@@ -19,6 +19,18 @@
 
 package org.apache.uima.ducc.sampleapps;
 
+/*
+ * This sample Cas Consumer is designed to create an output zip file for each Work Item.
+ * The CAS compression format is selectable as either ZIP compressed XmiCas or UIMA 
+ * compressed binary form 6 format. When compressed binary is used, each zip file also 
+ * contains the full UIMA Type System in ZIP compressed text.
+ * CASes in UIMA compressed binary form 6 format have the same flexibility as an XmiCas 
+ * in that they can be deserialized into a CAS with a different, but compatible Type System.
+ * 
+ * See more information in DUCC Book chapters on sample applications.
+ * 
+ */
+
 import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java Mon Dec  2 21:14:17 2013
@@ -19,6 +19,15 @@
 
 package org.apache.uima.ducc.sampleapps;
 
+/*
+ * This sample Cas Multiplier reads compressed CASes from a specified zipfile
+ * and returns each as a child CAS. A zipfile may contain zip-compressed XMI
+ * format CASes or UIMA compressed binary form 6 format CASes. 
+ * 
+ * See more information in DUCC Book chapters on sample applications.
+ * 
+ */
+
 import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java Mon Dec  2 21:14:17 2013
@@ -36,8 +36,11 @@ import org.apache.uima.util.Progress;
 import org.apache.uima.util.ProgressImpl;
 
 /**
- * A DUCC Job collection reader that reads text files from a directory in the filesystem.
- * It can be configured with the following parameters:
+ * A DUCC Job collection reader that returns a Work Item CAS for each zipfile found in
+ * a directory. By default any previously completed output files found in the output 
+ * directory are preserved, but a configuration parameter is available to ignore previous output. 
+
+ * This CR can be configured with the following parameters:
  * <ul>
  * <li><code>InputSpec</code> - path to directory containing input *.zip files</li>
  * <li><code>OutputDirectory</code> - path to directory for output files</li>

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java Mon Dec  2 21:14:17 2013
@@ -37,8 +37,15 @@ import org.apache.uima.util.Progress;
 import org.apache.uima.util.ProgressImpl;
 
 /**
- * A simple DUCC Job collection reader that reads text files from a directory in the filesystem.
- * It can be configured with the following parameters:
+ * A DUCC Job collection reader that segments a collection text files from a directory 
+ * in the filesystem into separate Work Items for a DUCC job.
+ * In order to facilitate processing scale out, an optional blocksize parameter can be 
+ * specified that will be used to break larger files into multiple Work Items.
+ * 
+ * By default any previously completed output files found in the output directory are preserved,
+ * but a configuration parameter is available to ignore previous output. 
+
+ * This CR can be configured with the following parameters:
  * <ul>
  * <li><code>InputDirectory</code> - path to directory containing input files</li>
  * <li><code>OutputDirectory</code> - path to directory for output files</li>

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java Mon Dec  2 21:14:17 2013
@@ -19,6 +19,15 @@
 
 package org.apache.uima.ducc.sampleapps;
 
+/*
+ * This sample Analysis Engine is a no-op annotator. It is used to
+ * demonstrate DUCC's overhead whenb processing a collection of compressed
+ * CASes. 
+ * 
+ * See more information in DUCC Book chapters on sample applications.
+ * 
+ */
+
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java Mon Dec  2 21:14:17 2013
@@ -19,6 +19,17 @@
 
 package org.apache.uima.ducc.sampleapps;
 
+/*
+ * This sample Cas Multiplier uses paragraph boundaries to segment a text file, 
+ * or a part of a text file, into multiple documents. A child CAS is created
+ * for each document. Paragraphs that cross block boundaries are processed
+ * in the block where they started. An error is thrown if a paragraph crosses 
+ * two block boundaries.
+ * 
+ * See more information in DUCC Book chapters on sample applications.
+ * 
+ */
+
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;