You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/05/02 14:34:33 UTC

svn commit: r1591889 [3/14] - in /opennlp/trunk: opennlp-tools/lang/ml/ opennlp-tools/src/main/java/opennlp/tools/chunker/ opennlp-tools/src/main/java/opennlp/tools/cmdline/ opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ opennlp-tools/src/m...

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java Fri May  2 12:34:23 2014
@@ -35,7 +35,7 @@ import opennlp.tools.util.model.ModelUti
 
 public final class SentenceDetectorCrossValidatorTool
     extends AbstractCrossValidatorTool<SentenceSample, CVToolParams> {
-  
+
   interface CVToolParams extends TrainingParams, CVParams {
   }
 
@@ -46,7 +46,7 @@ public final class SentenceDetectorCross
   public String getShortDescription() {
     return "K-fold cross validator for the learnable sentence detector";
   }
-  
+
   public void run(String format, String[] args) {
     super.run(format, args);
 
@@ -56,7 +56,7 @@ public final class SentenceDetectorCross
     }
 
     SDCrossValidator validator;
-    
+
     SentenceDetectorEvaluationMonitor errorListener = null;
     if (params.getMisclassified()) {
       errorListener = new SentenceEvaluationErrorListener();
@@ -74,7 +74,7 @@ public final class SentenceDetectorCross
           params.getFactory(), params.getLang(), true, abbreviations, eos);
       validator = new SDCrossValidator(params.getLang(), mlParams, sdFactory,
           errorListener);
-      
+
       validator.evaluate(sampleStream, params.getFolds());
     }
     catch (IOException e) {
@@ -88,9 +88,9 @@ public final class SentenceDetectorCross
         // sorry that this can fail
       }
     }
-    
+
     FMeasure result = validator.getFMeasure();
-    
+
     System.out.println(result.toString());
   }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java Fri May  2 12:34:23 2014
@@ -42,17 +42,17 @@ public final class SentenceDetectorEvalu
   public String getShortDescription() {
     return "evaluator for the learnable sentence detector";
   }
-  
+
   public void run(String format, String[] args) {
     super.run(format, args);
 
     SentenceModel model = new SentenceModelLoader().load(params.getModel());
-    
+
     SentenceDetectorEvaluationMonitor errorListener = null;
     if (params.getMisclassified()) {
       errorListener = new SentenceEvaluationErrorListener();
     }
-    
+
     SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(
         new SentenceDetectorME(model), errorListener);
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java Fri May  2 12:34:23 2014
@@ -39,7 +39,7 @@ public final class SentenceDetectorTool 
   public String getShortDescription() {
     return "learnable sentence detector";
   }
-  
+
   public String getHelp() {
     return "Usage: " + CLI.CMD + " " + getName() + " model < sentences";
   }
@@ -50,7 +50,7 @@ public final class SentenceDetectorTool 
    * A newline will be treated as a paragraph boundary.
    */
   public void run(String[] args) {
-    
+
     if (args.length != 1) {
       System.out.println(getHelp());
     } else {
@@ -64,7 +64,7 @@ public final class SentenceDetectorTool 
       try {
         ObjectStream<String> paraStream = new ParagraphStream(new PlainTextByLineStream(new SystemInputStreamFactory(),
             SystemInputStreamFactory.encoding()));
-        
+
         String para;
         while ((para = paraStream.read()) != null) {
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java Fri May  2 12:34:23 2014
@@ -38,7 +38,7 @@ import opennlp.tools.util.model.ModelUti
 
 public final class SentenceDetectorTrainerTool
     extends AbstractTrainerTool<SentenceSample, TrainerToolParams> {
-  
+
   interface TrainerToolParams extends TrainingParams, TrainingToolParams {
   }
 
@@ -49,7 +49,7 @@ public final class SentenceDetectorTrain
   public String getShortDescription() {
     return "trainer for the learnable sentence detector";
   }
-  
+
   static Dictionary loadDict(File f) throws IOException {
     Dictionary dict = null;
     if (f != null) {
@@ -58,7 +58,7 @@ public final class SentenceDetectorTrain
     }
     return dict;
   }
-  
+
   public void run(String format, String[] args) {
     super.run(format, args);
 
@@ -103,7 +103,7 @@ public final class SentenceDetectorTrain
         // sorry that this can fail
       }
     }
-    
+
     CmdLineUtil.writeModel("sentence detector", modelOutFile, model);
   }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceEvaluationErrorListener.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceEvaluationErrorListener.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceEvaluationErrorListener.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceEvaluationErrorListener.java Fri May  2 12:34:23 2014
@@ -27,7 +27,7 @@ import opennlp.tools.util.eval.Evaluatio
 /**
  * A default implementation of {@link EvaluationMonitor} that prints
  * to an output stream.
- * 
+ *
  */
 public class SentenceEvaluationErrorListener extends
     EvaluationErrorPrinter<SentenceSample> implements

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceModelLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceModelLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceModelLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceModelLoader.java Fri May  2 12:34:23 2014
@@ -34,7 +34,7 @@ final class SentenceModelLoader extends 
   public SentenceModelLoader() {
     super("Sentence Detector");
   }
-  
+
   @Override
   protected SentenceModel loadModel(InputStream modelIn) throws IOException,
       InvalidFormatException {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java Fri May  2 12:34:23 2014
@@ -25,7 +25,7 @@ import opennlp.tools.cmdline.params.Basi
 
 /**
  * TrainingParams for Sentence Detector.
- * 
+ *
  * Note: Do not use this class, internal use only!
  */
 interface TrainingParams extends BasicTrainingParams {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DetokenizationDictionaryLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DetokenizationDictionaryLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DetokenizationDictionaryLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DetokenizationDictionaryLoader.java Fri May  2 12:34:23 2014
@@ -28,7 +28,7 @@ final class DetokenizationDictionaryLoad
   DetokenizationDictionaryLoader() {
     super("detokenizer dictionary");
   }
-  
+
   @Override
   protected DetokenizationDictionary loadModel(InputStream modelIn) throws IOException {
     return new DetokenizationDictionary(modelIn);

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java Fri May  2 12:34:23 2014
@@ -36,10 +36,10 @@ public final class DictionaryDetokenizer
   public String getHelp() {
     return "Usage: " + CLI.CMD + " " + getName() + " detokenizerDictionary";
   }
-  
+
   public void run(String[] args) {
-    
-    
+
+
     if (args.length != 1) {
       System.out.println(getHelp());
     } else {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/SimpleTokenizerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/SimpleTokenizerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/SimpleTokenizerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/SimpleTokenizerTool.java Fri May  2 12:34:23 2014
@@ -25,7 +25,7 @@ public final class SimpleTokenizerTool e
   public String getShortDescription() {
     return "character class tokenizer";
   }
-  
+
   public String getHelp() {
     return "Usage: " + CLI.CMD + " " + getName() + " < sentences";
   }
@@ -39,7 +39,7 @@ public final class SimpleTokenizerTool e
     if (args.length != 0) {
       System.out.println(getHelp());
     } else {
-    
+
       CommandLineTokenizer tokenizer =
         new CommandLineTokenizer(opennlp.tools.tokenize.SimpleTokenizer.INSTANCE);
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenEvaluationErrorListener.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenEvaluationErrorListener.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenEvaluationErrorListener.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenEvaluationErrorListener.java Fri May  2 12:34:23 2014
@@ -27,7 +27,7 @@ import opennlp.tools.util.eval.Evaluatio
 /**
  * A default implementation of {@link EvaluationMonitor} that prints
  * to an output stream.
- * 
+ *
  */
 public class TokenEvaluationErrorListener extends
     EvaluationErrorPrinter<TokenSample> implements TokenizerEvaluationMonitor {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java Fri May  2 12:34:23 2014
@@ -34,7 +34,7 @@ import opennlp.tools.util.model.ModelUti
 
 public final class TokenizerCrossValidatorTool
     extends AbstractCrossValidatorTool<TokenSample, CVToolParams> {
-  
+
   interface CVToolParams extends CVParams, TrainingParams {
   }
 
@@ -45,7 +45,7 @@ public final class TokenizerCrossValidat
   public String getShortDescription() {
     return "K-fold cross validator for the learnable tokenizer";
   }
-  
+
   public void run(String format, String[] args) {
     super.run(format, args);
 
@@ -55,12 +55,12 @@ public final class TokenizerCrossValidat
     }
 
     TokenizerCrossValidator validator;
-    
+
     TokenizerEvaluationMonitor listener = null;
     if (params.getMisclassified()) {
       listener = new TokenEvaluationErrorListener();
     }
-    
+
     try {
       Dictionary dict = TokenizerTrainerTool.loadDict(params.getAbbDict());
 
@@ -83,9 +83,9 @@ public final class TokenizerCrossValidat
         // sorry that this can fail
       }
     }
-    
+
     FMeasure result = validator.getFMeasure();
-    
+
     System.out.println(result.toString());
   }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMEEvaluatorTool.java Fri May  2 12:34:23 2014
@@ -41,7 +41,7 @@ public final class TokenizerMEEvaluatorT
   public String getShortDescription() {
     return "evaluator for the learnable tokenizer";
   }
-  
+
   public void run(String format, String[] args) {
     super.run(format, args);
 
@@ -69,7 +69,7 @@ public final class TokenizerMEEvaluatorT
         // sorry that this can fail
       }
     }
-    
+
     System.out.println("done");
 
     System.out.println();

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMETool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMETool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMETool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMETool.java Fri May  2 12:34:23 2014
@@ -28,16 +28,16 @@ public final class TokenizerMETool exten
   public String getShortDescription() {
     return "learnable tokenizer";
   }
-  
+
   public String getHelp() {
     return "Usage: " + CLI.CMD + " " + getName() + " model < sentences";
   }
-  
+
   public void run(String[] args) {
     if (args.length != 1) {
       System.out.println(getHelp());
     } else {
-    
+
       TokenizerModel model = new TokenizerModelLoader().load(new File(args[0]));
 
       CommandLineTokenizer tokenizer =

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerModelLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerModelLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerModelLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerModelLoader.java Fri May  2 12:34:23 2014
@@ -33,7 +33,7 @@ public final class TokenizerModelLoader 
   public TokenizerModelLoader() {
     super("Tokenizer");
   }
-  
+
   @Override
   protected TokenizerModel loadModel(InputStream modelIn) throws IOException {
     return new TokenizerModel(modelIn);

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java Fri May  2 12:34:23 2014
@@ -36,7 +36,7 @@ import opennlp.tools.util.model.ModelUti
 
 public final class TokenizerTrainerTool
     extends AbstractTrainerTool<TokenSample, TrainerToolParams> {
-  
+
   interface TrainerToolParams extends TrainingParams, TrainingToolParams {
   }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java Fri May  2 12:34:23 2014
@@ -25,14 +25,14 @@ import opennlp.tools.cmdline.params.Basi
 
 /**
  * TrainingParameters for Tokenizer.
- * 
+ *
  * Note: Do not use this class, internal use only!
  */
 interface TrainingParams extends BasicTrainingParams {
   @ParameterDescription(valueName = "isAlphaNumOpt", description = "Optimization flag to skip alpha numeric tokens for further tokenization")
   @OptionalParameter(defaultValue = "false")
   Boolean getAlphaNumOpt();
-  
+
   @ParameterDescription(valueName = "path", description = "abbreviation dictionary in XML format.")
   @OptionalParameter
   File getAbbDict();

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java Fri May  2 12:34:23 2014
@@ -125,12 +125,12 @@ public class Dictionary implements Itera
 
   /**
    * Loads a Dictionary from a XML file.
-   * 
+   *
    * @deprecated This constructor is deprecated. Passing the case sensitivity
    *             flag has no effect. Use
    *             {@link Dictionary#Dictionary(InputStream)} instead and set the
    *             case sensitivity during the dictionary creation.
-   * 
+   *
    * @param in
    *          the dictionary in its XML format
    * @param caseSensitive
@@ -152,17 +152,17 @@ public class Dictionary implements Itera
       minTokenCount = Math.min(minTokenCount, tokens.size());
       maxTokenCount = Math.max(maxTokenCount, tokens.size());
   }
-  
+
   /**
-   * 
+   *
    * @return minimum token count in the dictionary
    */
   public int getMinTokenCount() {
       return minTokenCount;
   }
-  
+
   /**
-   * 
+   *
    * @return maximum token count in the dictionary
    */
   public int getMaxTokenCount() {
@@ -240,7 +240,7 @@ public class Dictionary implements Itera
         public Entry next() {
 
           StringList tokens = dictionaryIterator.next();
-          
+
           return new Entry(tokens, new Attributes());
         }
 
@@ -321,10 +321,10 @@ public class Dictionary implements Itera
   /**
    * Gets this dictionary as a {@code Set<String>}. Only {@code iterator()},
    * {@code size()} and {@code contains(Object)} methods are implemented.
-   * 
+   *
    * If this dictionary entries are multi tokens only the first token of the
    * entry will be part of the Set.
-   * 
+   *
    * @return a Set containing the entries of this dictionary
    */
   public Set<String> asStringSet() {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java Fri May  2 12:34:23 2014
@@ -58,7 +58,7 @@ public class DictionarySerializer {
 //    private boolean mIsInsideEntryElement;
     private boolean mIsInsideTokenElement;
     private boolean mIsCaseSensitiveDictionary;
-    
+
     private List<String> mTokenList = new LinkedList<String>();
 
     private StringBuilder token = new StringBuilder();
@@ -87,16 +87,16 @@ public class DictionarySerializer {
        if (DICTIONARY_ELEMENT.equals(localName)) {
 
          mAttributes = new Attributes();
-         
+
          for (int i = 0; i < atts.getLength(); i++) {
            mAttributes.setValue(atts.getLocalName(i), atts.getValue(i));
          }
          /* get the attribute here ... */
          if (mAttributes.getValue(ATTRIBUTE_CASE_SENSITIVE) != null) {
-           mIsCaseSensitiveDictionary = Boolean.valueOf(mAttributes.getValue(ATTRIBUTE_CASE_SENSITIVE));   
+           mIsCaseSensitiveDictionary = Boolean.valueOf(mAttributes.getValue(ATTRIBUTE_CASE_SENSITIVE));
          }
          mAttributes = null;
-       } 
+       }
        else if (ENTRY_ELEMENT.equals(localName)) {
 
          mAttributes = new Attributes();
@@ -193,7 +193,7 @@ public class DictionarySerializer {
   private static final String TOKEN_ELEMENT = "token";
   private static final String ATTRIBUTE_CASE_SENSITIVE = "case_sensitive";
 
-  
+
   /**
    * Creates {@link Entry}s from the given {@link InputStream} and
    * forwards these {@link Entry}s to the {@link EntryInserter}.
@@ -204,7 +204,7 @@ public class DictionarySerializer {
    * @param inserter inserter to forward entries to
    *
    * @return isCaseSensitive attribute for Dictionary
-   * 
+   *
    * @throws IOException
    * @throws InvalidFormatException
    */
@@ -240,11 +240,11 @@ public class DictionarySerializer {
    * @deprecated Use {@link DictionarySerializer#serialize(java.io.OutputStream, java.util.Iterator, boolean)} instead
    */
   @Deprecated
-  public static void serialize(OutputStream out, Iterator<Entry> entries) 
+  public static void serialize(OutputStream out, Iterator<Entry> entries)
           throws IOException {
       DictionarySerializer.serialize(out, entries, true);
   }
-  
+
   /**
    * Serializes the given entries to the given {@link OutputStream}.
    *
@@ -253,12 +253,12 @@ public class DictionarySerializer {
    *
    * @param out stream to serialize to
    * @param entries entries to serialize
-   * @param casesensitive indicates if the written dictionary 
+   * @param casesensitive indicates if the written dictionary
    *        should be case sensitive or case insensitive.
    *
    * @throws IOException If an I/O error occurs
    */
-  public static void serialize(OutputStream out, Iterator<Entry> entries, 
+  public static void serialize(OutputStream out, Iterator<Entry> entries,
           boolean casesensitive)
       throws IOException {
     StreamResult streamResult = new StreamResult(out);

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java Fri May  2 12:34:23 2014
@@ -29,14 +29,14 @@ import opennlp.tools.util.featuregen.Str
 public class BagOfWordsFeatureGenerator implements FeatureGenerator {
 
   private boolean useOnlyAllLetterTokens = false;
-  
+
   public BagOfWordsFeatureGenerator() {
   }
-  
+
   BagOfWordsFeatureGenerator(boolean useOnlyAllLetterTokens) {
     this.useOnlyAllLetterTokens = useOnlyAllLetterTokens;
   }
-  
+
   @Override
   public Collection<String> extractFeatures(String[] text) {
 
@@ -45,7 +45,7 @@ public class BagOfWordsFeatureGenerator 
     for (String word : text) {
       if (useOnlyAllLetterTokens) {
         StringPattern pattern = StringPattern.recognize(word);
-        
+
         if (pattern.isAllLetter())
           bagOfWords.add("bow=" + word);
       }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java Fri May  2 12:34:23 2014
@@ -47,10 +47,10 @@ public interface DocumentCategorizer {
   public double[] categorize(String documentText);
 
   public String getAllResults(double results[]);
-  
-  public Map<String, Double> scoreMap(String text); 
+
+  public Map<String, Double> scoreMap(String text);
 
   public SortedMap<Double, Set<String>> sortedScoreMap(String text);
-  
+
 }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java Fri May  2 12:34:23 2014
@@ -94,7 +94,7 @@ public class DocumentCategorizerME imple
 /**
  * Returns a map in which the key is the category name and the value is the score
  * @param text the input text to classify
- * @return 
+ * @return
  */
   public Map<String, Double> scoreMap(String text) {
     Map<String, Double> probDist = new HashMap<String, Double>();
@@ -109,10 +109,10 @@ public class DocumentCategorizerME imple
 
   }
 /**
- * Returns a map with the score as a key in ascendng order. The value is a Set of categories with the score. 
+ * Returns a map with the score as a key in ascendng order. The value is a Set of categories with the score.
  * Many categories can have the same score, hence the Set as value
  * @param text the input text to classify
- * @return 
+ * @return
  */
   public SortedMap<Double, Set<String>> sortedScoreMap(String text) {
     SortedMap<Double, Set<String>> descendingMap = new TreeMap<Double, Set<String>>();

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java Fri May  2 12:34:23 2014
@@ -56,26 +56,26 @@ public class DocumentSample {
   public String[] getText() {
     return text.toArray(new String[text.size()]);
   }
-  
+
   @Override
   public String toString() {
-    
+
     StringBuilder sampleString = new StringBuilder();
-    
+
     sampleString.append(category).append('\t');
 
     for (String s : text) {
       sampleString.append(s).append(' ');
     }
-    
+
     if (sampleString.length() > 0) {
       // remove last space
       sampleString.setLength(sampleString.length() - 1);
     }
-    
+
     return sampleString.toString();
   }
-  
+
   @Override
   public boolean equals(Object obj) {
     if (this == obj) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java Fri May  2 12:34:23 2014
@@ -39,25 +39,25 @@ public class DocumentSampleStream extend
 
   public DocumentSample read() throws IOException {
     String sampleString = samples.read();
-    
+
     if (sampleString != null) {
-      
+
       // Whitespace tokenize entire string
       String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(sampleString);
-      
+
       DocumentSample sample;
-      
+
       if (tokens.length > 1) {
         String category = tokens[0];
         String docTokens[] = new String[tokens.length - 1];
         System.arraycopy(tokens, 1, docTokens, 0, tokens.length -1);
-        
+
         sample = new DocumentSample(category, docTokens);
       }
       else {
         throw new IOException("Empty lines, or lines with only a category string are not allowed!");
       }
-      
+
       return sample;
     }
     else {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java Fri May  2 12:34:23 2014
@@ -62,7 +62,7 @@ public class EntityLinkerProperties {
     props = new Properties();
     props.load(propertiesIn);
   }
-  
+
   /**
    * Gets a property from the props file.
    *

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java Fri May  2 12:34:23 2014
@@ -49,11 +49,11 @@ public class BioNLP2004NameSampleStream 
   public static final int GENERATE_CELLTYPE_ENTITIES = 0x01 << 2;
   public static final int GENERATE_CELLLINE_ENTITIES = 0x01 << 3;
   public static final int GENERATE_RNA_ENTITIES = 0x01 << 4;
-  
+
   private final int types;
-  
+
   private final ObjectStream<String> lineStream;
-  
+
   public BioNLP2004NameSampleStream(InputStreamFactory in, int types) throws IOException {
     try {
       this.lineStream = new PlainTextByLineStream(in, Charset.forName("UTF-8"));
@@ -62,11 +62,11 @@ public class BioNLP2004NameSampleStream 
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
     }
-    
+
     this.types = types;
-    
+
   }
-  
+
   @Deprecated
   public BioNLP2004NameSampleStream(InputStream in, int types) {
     try {
@@ -76,33 +76,33 @@ public class BioNLP2004NameSampleStream 
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
     }
-    
+
     this.types = types;
   }
-  
+
   public NameSample read() throws IOException {
 
     List<String> sentence = new ArrayList<String>();
     List<String> tags = new ArrayList<String>();
-    
+
     boolean isClearAdaptiveData = false;
-    
+
     // Empty line indicates end of sentence
-    
+
     String line;
     while ((line = lineStream.read()) != null && !StringUtil.isEmpty(line.trim())) {
-      
+
       if (line.startsWith("###MEDLINE:")) {
         isClearAdaptiveData = true;
         lineStream.read();
         continue;
       }
-      
+
       if (line.contains("ABSTRACT TRUNCATED"))
         continue;
-      
+
       String fields[] = line.split("\t");
-      
+
       if (fields.length == 2) {
         sentence.add(fields[0]);
         tags.add(fields[1]);
@@ -112,40 +112,40 @@ public class BioNLP2004NameSampleStream 
             fields.length + " for line '" + line + "'!");
       }
     }
-    
+
     if (sentence.size() > 0) {
-      
+
       // convert name tags into spans
       List<Span> names = new ArrayList<Span>();
-      
+
       int beginIndex = -1;
       int endIndex = -1;
       for (int i = 0; i < tags.size(); i++) {
-        
+
         String tag = tags.get(i);
-        
-        if (tag.endsWith("DNA") && (types & GENERATE_DNA_ENTITIES) == 0) 
+
+        if (tag.endsWith("DNA") && (types & GENERATE_DNA_ENTITIES) == 0)
           tag = "O";
-        
-        if (tag.endsWith("protein") && (types & GENERATE_PROTEIN_ENTITIES) == 0) 
+
+        if (tag.endsWith("protein") && (types & GENERATE_PROTEIN_ENTITIES) == 0)
           tag = "O";
-        
-        if (tag.endsWith("cell_type") && (types & GENERATE_CELLTYPE_ENTITIES) == 0) 
+
+        if (tag.endsWith("cell_type") && (types & GENERATE_CELLTYPE_ENTITIES) == 0)
           tag = "O";
 
-        if (tag.endsWith("cell_line") && (types & GENERATE_CELLTYPE_ENTITIES) == 0) 
+        if (tag.endsWith("cell_line") && (types & GENERATE_CELLTYPE_ENTITIES) == 0)
           tag = "O";
-        if (tag.endsWith("RNA") && (types & GENERATE_RNA_ENTITIES) == 0) 
+        if (tag.endsWith("RNA") && (types & GENERATE_RNA_ENTITIES) == 0)
           tag = "O";
-        
+
         if (tag.startsWith("B-")) {
-          
+
           if (beginIndex != -1) {
             names.add(new Span(beginIndex, endIndex, tags.get(beginIndex).substring(2)));
             beginIndex = -1;
             endIndex = -1;
           }
-          
+
           beginIndex = i;
           endIndex = i +1;
         }
@@ -163,11 +163,11 @@ public class BioNLP2004NameSampleStream 
           throw new IOException("Invalid tag: " + tag);
         }
       }
-      
+
       // if one span remains, create it here
       if (beginIndex != -1)
         names.add(new Span(beginIndex, endIndex, tags.get(beginIndex).substring(2)));
-      
+
       return new NameSample(sentence.toArray(new String[sentence.size()]), names.toArray(new Span[names.size()]), isClearAdaptiveData);
     }
     else if (line != null) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStreamFactory.java Fri May  2 12:34:23 2014
@@ -42,29 +42,29 @@ public class BioNLP2004NameSampleStreamF
   }
 
   public ObjectStream<NameSample> create(String[] args) {
-    
+
     Parameters params = ArgumentParser.parse(args, Parameters.class);
 
     int typesToGenerate = 0;
-    
+
     if (params.getTypes().contains("DNA")) {
-      typesToGenerate = typesToGenerate | 
+      typesToGenerate = typesToGenerate |
           BioNLP2004NameSampleStream.GENERATE_DNA_ENTITIES;
     }
     else if (params.getTypes().contains("protein")) {
-      typesToGenerate = typesToGenerate | 
+      typesToGenerate = typesToGenerate |
           BioNLP2004NameSampleStream.GENERATE_PROTEIN_ENTITIES;
     }
     else if (params.getTypes().contains("cell_type")) {
-      typesToGenerate = typesToGenerate | 
+      typesToGenerate = typesToGenerate |
           BioNLP2004NameSampleStream.GENERATE_CELLTYPE_ENTITIES;
     }
     else if (params.getTypes().contains("cell_line")) {
-      typesToGenerate = typesToGenerate | 
+      typesToGenerate = typesToGenerate |
           BioNLP2004NameSampleStream.GENERATE_CELLLINE_ENTITIES;
     }
     else if (params.getTypes().contains("RNA")) {
-      typesToGenerate = typesToGenerate | 
+      typesToGenerate = typesToGenerate |
           BioNLP2004NameSampleStream.GENERATE_RNA_ENTITIES;
     }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java Fri May  2 12:34:23 2014
@@ -53,25 +53,25 @@ public class Conll02NameSampleStream imp
     NL,
     ES
   }
-  
+
   public static final int GENERATE_PERSON_ENTITIES = 0x01;
   public static final int GENERATE_ORGANIZATION_ENTITIES = 0x01 << 1;
   public static final int GENERATE_LOCATION_ENTITIES = 0x01 << 2;
   public static final int GENERATE_MISC_ENTITIES = 0x01 << 3;
-  
+
   public static final String DOCSTART = "-DOCSTART-";
-	
+
   private final LANGUAGE lang;
   private final ObjectStream<String> lineStream;
-  
+
   private final int types;
-  
+
   public Conll02NameSampleStream(LANGUAGE lang, ObjectStream<String> lineStream, int types) {
     this.lang = lang;
     this.lineStream = lineStream;
     this.types = types;
   }
-  
+
   public Conll02NameSampleStream(LANGUAGE lang, InputStreamFactory in, int types) throws IOException {
     this.lang = lang;
     try {
@@ -80,13 +80,13 @@ public class Conll02NameSampleStream imp
     } catch (UnsupportedEncodingException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
-    } 
+    }
     this.types = types;
   }
-  
+
   /**
    * @param lang
-   * @param in an Input Stream to read data. 
+   * @param in an Input Stream to read data.
    */
   @Deprecated
   public Conll02NameSampleStream(LANGUAGE lang, InputStream in, int types) {
@@ -97,14 +97,14 @@ public class Conll02NameSampleStream imp
     } catch (UnsupportedEncodingException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
-    } 
+    }
     this.types = types;
   }
-  
+
   static final Span extract(int begin, int end, String beginTag) throws InvalidFormatException {
-    
+
     String type = beginTag.substring(2);
-    
+
     if ("PER".equals(type)) {
       type = "person";
     }
@@ -120,30 +120,30 @@ public class Conll02NameSampleStream imp
     else {
       throw new InvalidFormatException("Unknown type: " + type);
     }
-    
+
     return new Span(begin, end, type);
   }
 
-  
+
   public NameSample read() throws IOException {
 
     List<String> sentence = new ArrayList<String>();
     List<String> tags = new ArrayList<String>();
-    
+
     boolean isClearAdaptiveData = false;
-    
+
     // Empty line indicates end of sentence
-    
+
     String line;
     while ((line = lineStream.read()) != null && !StringUtil.isEmpty(line)) {
-      
+
       if (LANGUAGE.NL.equals(lang) && line.startsWith(DOCSTART)) {
         isClearAdaptiveData = true;
         continue;
       }
-      
+
       String fields[] = line.split(" ");
-      
+
       if (fields.length == 3) {
         sentence.add(fields[0]);
         tags.add(fields[2]);
@@ -153,42 +153,42 @@ public class Conll02NameSampleStream imp
             fields.length + " for line '" + line + "'!");
       }
     }
-    
+
     // Always clear adaptive data for spanish
     if (LANGUAGE.ES.equals(lang))
       isClearAdaptiveData = true;
-    
+
     if (sentence.size() > 0) {
-      
+
       // convert name tags into spans
       List<Span> names = new ArrayList<Span>();
-      
+
       int beginIndex = -1;
       int endIndex = -1;
       for (int i = 0; i < tags.size(); i++) {
-        
+
         String tag = tags.get(i);
-        
-        if (tag.endsWith("PER") && (types & GENERATE_PERSON_ENTITIES) == 0) 
+
+        if (tag.endsWith("PER") && (types & GENERATE_PERSON_ENTITIES) == 0)
           tag = "O";
-        
-        if (tag.endsWith("ORG") && (types & GENERATE_ORGANIZATION_ENTITIES) == 0) 
+
+        if (tag.endsWith("ORG") && (types & GENERATE_ORGANIZATION_ENTITIES) == 0)
           tag = "O";
-        
-        if (tag.endsWith("LOC") && (types & GENERATE_LOCATION_ENTITIES) == 0) 
+
+        if (tag.endsWith("LOC") && (types & GENERATE_LOCATION_ENTITIES) == 0)
           tag = "O";
-        
-        if (tag.endsWith("MISC") && (types & GENERATE_MISC_ENTITIES) == 0) 
+
+        if (tag.endsWith("MISC") && (types & GENERATE_MISC_ENTITIES) == 0)
           tag = "O";
-        
+
         if (tag.startsWith("B-")) {
-          
+
           if (beginIndex != -1) {
             names.add(extract(beginIndex, endIndex, tags.get(beginIndex)));
             beginIndex = -1;
             endIndex = -1;
           }
-          
+
           beginIndex = i;
           endIndex = i +1;
         }
@@ -206,11 +206,11 @@ public class Conll02NameSampleStream imp
           throw new IOException("Invalid tag: " + tag);
         }
       }
-      
+
       // if one span remains, create it here
       if (beginIndex != -1)
         names.add(extract(beginIndex, endIndex, tags.get(beginIndex)));
-      
+
       return new NameSample(sentence.toArray(new String[sentence.size()]), names.toArray(new Span[names.size()]), isClearAdaptiveData);
     }
     else if (line != null) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java Fri May  2 12:34:23 2014
@@ -33,11 +33,11 @@ import opennlp.tools.util.ObjectStream;
  * <b>Note:</b> Do not use this class, internal use only!
  */
 public class Conll02NameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> {
-  
+
   interface Parameters extends BasicFormatParams {
     @ParameterDescription(valueName = "es|nl")
     String getLang();
-    
+
     @ParameterDescription(valueName = "per,loc,org,misc")
     String getTypes();
   }
@@ -52,9 +52,9 @@ public class Conll02NameSampleStreamFact
   }
 
   public ObjectStream<NameSample> create(String[] args) {
-    
+
     Parameters params = ArgumentParser.parse(args, Parameters.class);
-    
+
     LANGUAGE lang;
     if ("nl".equals(params.getLang())) {
       lang = LANGUAGE.NL;
@@ -67,27 +67,27 @@ public class Conll02NameSampleStreamFact
     else {
       throw new TerminateToolException(1, "Unsupported language: " + params.getLang());
     }
-    
+
     int typesToGenerate = 0;
-    
+
     if (params.getTypes().contains("per")) {
-      typesToGenerate = typesToGenerate | 
+      typesToGenerate = typesToGenerate |
           Conll02NameSampleStream.GENERATE_PERSON_ENTITIES;
     }
     if (params.getTypes().contains("org")) {
-      typesToGenerate = typesToGenerate | 
+      typesToGenerate = typesToGenerate |
           Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES;
     }
     if (params.getTypes().contains("loc")) {
-      typesToGenerate = typesToGenerate | 
+      typesToGenerate = typesToGenerate |
           Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES;
     }
     if (params.getTypes().contains("misc")) {
-      typesToGenerate = typesToGenerate | 
+      typesToGenerate = typesToGenerate |
           Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
     }
 
-    
+
     try {
       return new Conll02NameSampleStream(lang,
           CmdLineUtil.createInputStreamFactory(params.getData()), typesToGenerate);

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java Fri May  2 12:34:23 2014
@@ -2,9 +2,9 @@
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
  *  You may obtain a copy of the License at
- * 
+ *
  *       http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -40,7 +40,7 @@ public class Conll03NameSampleStream imp
     EN,
     DE
   }
-  
+
   private final LANGUAGE lang;
   private final ObjectStream<String> lineStream;
 
@@ -70,7 +70,7 @@ public class Conll03NameSampleStream imp
     }
     this.types = types;
   }
-  
+
   /**
    *
    * @param lang
@@ -106,10 +106,10 @@ public class Conll03NameSampleStream imp
       if (line.startsWith(Conll02NameSampleStream.DOCSTART)) {
         isClearAdaptiveData = true;
         String emptyLine = lineStream.read();
-        
+
         if (!StringUtil.isEmpty(emptyLine))
           throw new IOException("Empty line after -DOCSTART- not empty: '" + emptyLine +"'!");
-        
+
         continue;
       }
 
@@ -141,19 +141,19 @@ public class Conll03NameSampleStream imp
 
         String tag = tags.get(i);
 
-        if (tag.endsWith("PER") && 
+        if (tag.endsWith("PER") &&
         		(types & Conll02NameSampleStream.GENERATE_PERSON_ENTITIES) == 0)
           tag = "O";
 
-        if (tag.endsWith("ORG") && 
+        if (tag.endsWith("ORG") &&
         		(types & Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES) == 0)
           tag = "O";
 
-        if (tag.endsWith("LOC") && 
+        if (tag.endsWith("LOC") &&
         		(types & Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES) == 0)
           tag = "O";
 
-        if (tag.endsWith("MISC") && 
+        if (tag.endsWith("MISC") &&
         		(types & Conll02NameSampleStream.GENERATE_MISC_ENTITIES) == 0)
           tag = "O";
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java Fri May  2 12:34:23 2014
@@ -2,9 +2,9 @@
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
  *  You may obtain a copy of the License at
- * 
+ *
  *       http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java Fri May  2 12:34:23 2014
@@ -45,7 +45,7 @@ public class ConllXPOSSampleStream exten
   public ConllXPOSSampleStream(ObjectStream<String> lineStream) {
     super(new ParagraphStream(lineStream));
   }
-  
+
   ConllXPOSSampleStream(InputStreamFactory in, Charset charset) throws IOException {
     super(new ParagraphStream(new PlainTextByLineStream(in, charset)));
   }
@@ -55,29 +55,29 @@ public class ConllXPOSSampleStream exten
     // The CONLL-X data has a word per line and each line is tab separated
     // in the following format:
     // ID, FORM, LEMMA, CPOSTAG, POSTAG, ... (max 10 fields)
-     
+
     // One paragraph contains a whole sentence and, the token
     // and tag will be read from the FORM and POSTAG field.
-    
+
    String paragraph = samples.read();
-   
+
    POSSample sample = null;
-   
+
    if (paragraph != null) {
-     
+
      // paragraph get lines
      BufferedReader reader = new BufferedReader(new StringReader(paragraph));
-     
+
      List<String> tokens = new ArrayList<String>(100);
      List<String> tags = new ArrayList<String>(100);
-     
+
      String line;
      while ((line = reader.readLine())  != null) {
-     
+
        final int minNumberOfFields = 5;
-       
+
        String parts[] = line.split("\t");
-       
+
        if (parts.length >= minNumberOfFields) {
          tokens.add(parts[1]);
          tags.add(parts[4]);
@@ -87,14 +87,14 @@ public class ConllXPOSSampleStream exten
              minNumberOfFields + " fields: '" + line + "'!");
        }
      }
-     
+
      // just skip empty samples and read next sample
      if (tokens.size() == 0)
        sample = read();
-       
+
      sample = new POSSample(tokens.toArray(new String[tokens.size()]), tags.toArray(new String[tags.size()]));
    }
-   
+
    return sample;
   }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java Fri May  2 12:34:23 2014
@@ -37,7 +37,7 @@ import opennlp.tools.util.ObjectStream;
 public class ConllXPOSSampleStreamFactory extends AbstractSampleStreamFactory<POSSample> {
 
   public static final String CONLLX_FORMAT = "conllx";
-  
+
   interface Parameters extends BasicFormatParams {
   }
 
@@ -53,9 +53,9 @@ public class ConllXPOSSampleStreamFactor
   public ObjectStream<POSSample> create(String[] args) {
     Parameters params = ArgumentParser.parse(args, Parameters.class);
 
-    InputStreamFactory inFactory = 
+    InputStreamFactory inFactory =
         CmdLineUtil.createInputStreamFactory(params.getData());
-    
+
     try {
       System.setOut(new PrintStream(System.out, true, "UTF-8"));
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java Fri May  2 12:34:23 2014
@@ -31,7 +31,7 @@ import opennlp.tools.util.ObjectStream;
 public class ConllXSentenceSampleStreamFactory extends
     DetokenizerSampleStreamFactory<SentenceSample> {
 
-  interface Parameters extends ConllXPOSSampleStreamFactory.Parameters, DetokenizerParameter {    
+  interface Parameters extends ConllXPOSSampleStreamFactory.Parameters, DetokenizerParameter {
     // TODO: make chunk size configurable
   }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java Fri May  2 12:34:23 2014
@@ -34,55 +34,55 @@ import opennlp.tools.util.ObjectStream;
 public class DirectorySampleStream implements ObjectStream<File> {
 
   private final List<File> inputDirectories;
-  
+
   private final boolean isRecursiveScan;
-  
+
   private final FileFilter fileFilter;
-  
+
   private Stack<File> directories = new Stack<File>();
-  
+
   private Stack<File> textFiles = new Stack<File>();
-  
+
   public DirectorySampleStream(File dirs[], FileFilter fileFilter, boolean recursive) {
 
-    this.fileFilter= fileFilter; 
+    this.fileFilter= fileFilter;
     isRecursiveScan = recursive;
-    
+
     List<File> inputDirectoryList = new ArrayList<File>(dirs.length);
-    
+
     for (File dir : dirs) {
       if (!dir.isDirectory()) {
         throw new IllegalArgumentException(
             "All passed in directories must be directories, but \""
             + dir.toString() + "\" is not!");
       }
-      
+
       inputDirectoryList.add(dir);
     }
-    
+
     inputDirectories = Collections.unmodifiableList(inputDirectoryList);
-    
+
     directories.addAll(inputDirectories);
   }
-  
+
   public DirectorySampleStream(File dir, FileFilter fileFilter, boolean recursive) {
     this(new File[]{dir}, fileFilter, recursive);
   }
-  
+
   public File read() throws IOException {
 
     while(textFiles.isEmpty() && !directories.isEmpty()) {
       File dir = directories.pop();
-      
+
       File files[];
-      
+
       if (fileFilter != null) {
         files = dir.listFiles(fileFilter);
       }
       else {
         files = dir.listFiles();
       }
-      
+
       for (File file : files) {
         if (file.isFile()) {
           textFiles.push(file);
@@ -92,7 +92,7 @@ public class DirectorySampleStream imple
         }
       }
     }
-    
+
     if (!textFiles.isEmpty()) {
       return textFiles.pop();
     }
@@ -104,7 +104,7 @@ public class DirectorySampleStream imple
   public void reset() {
     directories.clear();
     textFiles.clear();
-    
+
     directories.addAll(inputDirectories);
   }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java Fri May  2 12:34:23 2014
@@ -79,7 +79,7 @@ public class EvalitaNameSampleStream imp
 	    this.lineStream = lineStream;
 	    this.types = types;
 	  }
-	  
+
 	  public EvalitaNameSampleStream(LANGUAGE lang, InputStreamFactory in, int types) throws IOException {
 	    this.lang = lang;
 	    try {
@@ -91,7 +91,7 @@ public class EvalitaNameSampleStream imp
 	    }
 	    this.types = types;
 	  }
-	  
+
   /**
    * @param lang
    * @param in an Input Stream to read data.
@@ -167,7 +167,7 @@ public class EvalitaNameSampleStream imp
           throw new IOException("Incorrect number of fields per line for language: '" + line + "'!");
         }
     }
-    
+
     // Always clear adaptive data for Italian
     if (LANGUAGE.IT.equals(lang))
       isClearAdaptiveData = true;

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java Fri May  2 12:34:23 2014
@@ -38,26 +38,26 @@ import opennlp.tools.util.PlainTextByLin
  */
 public class LeipzigDoccatSampleStream extends
     FilterObjectStream<String, DocumentSample> {
-  
+
   private final String language;
   private final int sentencesPerDocument;
 
   /**
    * Creates a new LeipzigDoccatSampleStream with the specified parameters.
-   * 
+   *
    * @param language the Leipzig input sentences.txt file
    * @param sentencesPerDocument the number of sentences which should be grouped into once {@link DocumentSample}
    * @param in the InputStream pointing to the contents of the sentences.txt input file
    * @throws IOException IOException
    */
-  LeipzigDoccatSampleStream(String language, int sentencesPerDocument, 
+  LeipzigDoccatSampleStream(String language, int sentencesPerDocument,
       InputStream in) throws IOException {
     super(new PlainTextByLineStream(in, "UTF-8"));
     System.setOut(new PrintStream(System.out, true, "UTF-8"));
     this.language = language;
     this.sentencesPerDocument = sentencesPerDocument;
   }
-  
+
   public DocumentSample read() throws IOException {
 
     int count = 0;
@@ -68,25 +68,25 @@ public class LeipzigDoccatSampleStream e
     while (count < sentencesPerDocument && (line = samples.read()) != null) {
 
       String tokens[] = SimpleTokenizer.INSTANCE.tokenize(line);
-      
+
       if (tokens.length == 0) {
         throw new IOException("Empty lines are not allowed!");
       }
-        
+
       // Always skip first token, that is the sentence number!
       for (int i = 1; i < tokens.length; i++) {
         sampleText.append(tokens[i]);
         sampleText.append(' ');
       }
-      
+
       count++;
     }
 
-    
+
     if (sampleText.length() > 0) {
       return new DocumentSample(language, sampleText.toString());
     }
-  
+
     return null;
   }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java Fri May  2 12:34:23 2014
@@ -46,7 +46,7 @@ public class LeipzigDocumentSampleStream
   }
 
   public ObjectStream<DocumentSample> create(String[] args) {
-    
+
     Parameters params = ArgumentParser.parse(args, Parameters.class);
     language = params.getLang();
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java Fri May  2 12:34:23 2014
@@ -2,9 +2,9 @@
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
  *  You may obtain a copy of the License at
- * 
+ *
  *       http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java Fri May  2 12:34:23 2014
@@ -72,7 +72,7 @@ public class ADChunkSampleStream impleme
 	 * Creates a new {@link NameSample} stream from a line stream, i.e.
 	 * {@link ObjectStream}&lt;{@link String}&gt;, that could be a
 	 * {@link PlainTextByLineStream} object.
-	 * 
+	 *
 	 * @param lineStream
 	 *          a stream of lines as {@link String}
 	 */
@@ -90,10 +90,10 @@ public class ADChunkSampleStream impleme
         throw new IllegalStateException(e);
       }
     }
-	   
+
 	/**
 	 * Creates a new {@link NameSample} stream from a {@link InputStream}
-	 * 
+	 *
 	 * @param in
 	 *          the Corpus {@link InputStream}
 	 * @param charsetName
@@ -160,7 +160,7 @@ public class ADChunkSampleStream impleme
     private void processNode(Node node, List<String> sentence, List<String> tags,
         List<String> target, String inheritedTag) {
     String phraseTag = getChunkTag(node);
-    
+
     boolean inherited = false;
     if(phraseTag.equals(OTHER) && inheritedTag != null) {
       phraseTag = inheritedTag;
@@ -173,12 +173,12 @@ public class ADChunkSampleStream impleme
             boolean isIntermediate = false;
             String tag = phraseTag;
             Leaf leaf = (Leaf) elements[i];
-            
+
             String localChunk = getChunkTag(leaf);
             if(localChunk != null && !tag.equals(localChunk)) {
               tag = localChunk;
             }
-            
+
             if(isIntermediate(tags, target, tag) && (inherited || i > 0)) {
                   isIntermediate = true;
             }
@@ -186,7 +186,7 @@ public class ADChunkSampleStream impleme
                 (
                     !( i + 1 < elements.length && elements[i+1].isLeaf() ) ||
                     !( i > 0 && elements[i - 1].isLeaf() )
-                ) 
+                )
               ){
               isIntermediate = false;
               tag = OTHER;
@@ -196,7 +196,7 @@ public class ADChunkSampleStream impleme
         } else {
             int before = target.size();
             processNode((Node) elements[i], sentence, tags, target, phraseTag);
-            
+
             // if the child node was of a different type we should break the chunk sequence
             for (int j = target.size() - 1; j >= before; j--) {
               if(!target.get(j).endsWith("-" + phraseTag)) {
@@ -212,7 +212,7 @@ public class ADChunkSampleStream impleme
   protected void processLeaf(Leaf leaf, boolean isIntermediate, String phraseTag,
 			List<String> sentence, List<String> tags, List<String> target) {
 		String chunkTag;
-		
+
 		if (leaf.getFunctionalTag() != null
 				&& phraseTag.equals(OTHER)) {
 		  phraseTag = getPhraseTagFromPosTag(leaf.getFunctionalTag());
@@ -254,7 +254,7 @@ public class ADChunkSampleStream impleme
     }
     return t;
   }
-  
+
   protected String getChunkTag(Leaf leaf) {
     String tag = leaf.getSyntacticTag();
     if("P".equals(tag)) {
@@ -265,7 +265,7 @@ public class ADChunkSampleStream impleme
 
   protected String getChunkTag(Node node) {
     String tag = node.getSyntacticTag();
-    
+
     String phraseTag = tag.substring(tag.lastIndexOf(":") + 1);
 
     while (phraseTag.endsWith("-")) {
@@ -298,7 +298,7 @@ public class ADChunkSampleStream impleme
 	public void close() throws IOException {
 		adSentenceStream.close();
 	}
-	
+
   protected boolean isIncludePunctuations() {
     return false;
   }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java Fri May  2 12:34:23 2014
@@ -56,7 +56,7 @@ public class ADChunkSampleStreamFactory 
     @ParameterDescription(valueName = "start", description = "index of first sentence")
     @OptionalParameter
     Integer getStart();
-    
+
     @ParameterDescription(valueName = "end", description = "index of last sentence")
     @OptionalParameter
     Integer getEnd();
@@ -78,7 +78,7 @@ public class ADChunkSampleStreamFactory 
     language = params.getLang();
 
     InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData());
-    
+
     ObjectStream<String> lineStream=null;
     try {
       lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding());
@@ -91,11 +91,11 @@ public class ADChunkSampleStreamFactory 
     if(params.getStart() != null && params.getStart() > -1) {
       sampleStream.setStart(params.getStart());
     }
-    
+
     if(params.getEnd() != null && params.getEnd() > -1) {
       sampleStream.setEnd(params.getEnd());
     }
-    
+
     return sampleStream;
   }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java Fri May  2 12:34:23 2014
@@ -64,17 +64,17 @@ import opennlp.tools.util.Span;
  */
 public class ADNameSampleStream implements ObjectStream<NameSample> {
 
-  /** 
-   * Pattern of a NER tag in Arvores Deitadas 
+  /**
+   * Pattern of a NER tag in Arvores Deitadas
    */
   private static final Pattern tagPattern = Pattern.compile("<(NER:)?(.*?)>");
-  
+
   private static final Pattern whitespacePattern = Pattern.compile("\\s+");
   private static final Pattern underlinePattern = Pattern.compile("[_]+");
   private static final Pattern hyphenPattern = Pattern.compile("((\\p{L}+)-$)|(^-(\\p{L}+)(.*))|((\\p{L}+)-(\\p{L}+)(.*))");
   private static final Pattern alphanumericPattern = Pattern.compile("^[\\p{L}\\p{Nd}]+$");
 
-  /** 
+  /**
    * Map to the Arvores Deitadas types to our types. It is read-only.
    */
   private static final Map<String, String> HAREM;
@@ -150,21 +150,21 @@ public class ADNameSampleStream implemen
 
     HAREM = Collections.unmodifiableMap(harem);
   }
-  
+
   private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
 
-  /** 
+  /**
    * To keep the last left contraction part
    */
   private String leftContractionPart = null;
 
   private final boolean splitHyphenatedTokens;
-  
+
   /**
    * Creates a new {@link NameSample} stream from a line stream, i.e.
    * {@link ObjectStream}&lt;{@link String}&gt;, that could be a
    * {@link PlainTextByLineStream} object.
-   * 
+   *
    * @param lineStream
    *          a stream of lines as {@link String}
    * @param splitHyphenatedTokens
@@ -178,7 +178,7 @@ public class ADNameSampleStream implemen
 
   /**
    * Creates a new {@link NameSample} stream from a {@link InputStream}
-   * 
+   *
    * @param in
    *          the Corpus {@link InputStream}
    * @param charsetName
@@ -200,10 +200,10 @@ public class ADNameSampleStream implemen
       throw new IllegalStateException(e);
     }
   }
-  
+
   /**
    * Creates a new {@link NameSample} stream from a {@link InputStream}
-   * 
+   *
    * @param in
    *          the Corpus {@link InputStream}
    * @param charsetName
@@ -227,20 +227,20 @@ public class ADNameSampleStream implemen
   }
 
   int textID = -1;
-  
+
   public NameSample read() throws IOException {
 
     Sentence paragraph;
     // we should look for text here.
     while ((paragraph = this.adSentenceStream.read()) != null) {
-      
+
       int currentTextID = getTextID(paragraph);
       boolean clearData = false;
       if(currentTextID != textID) {
         clearData = true;
         textID = currentTextID;
       }
-      
+
       Node root = paragraph.getRoot();
       List<String> sentence = new ArrayList<String>();
       List<Span> names = new ArrayList<Span>();
@@ -254,7 +254,7 @@ public class ADNameSampleStream implemen
 
   /**
    * Recursive method to process a node in Arvores Deitadas format.
-   * 
+   *
    * @param node
    *          the node to be processed
    * @param sentence
@@ -276,7 +276,7 @@ public class ADNameSampleStream implemen
 
   /**
    * Process a Leaf of Arvores Detaitadas format
-   * 
+   *
    * @param leaf
    *          the leaf to be processed
    * @param sentence
@@ -286,7 +286,7 @@ public class ADNameSampleStream implemen
    */
   private void processLeaf(Leaf leaf, List<String> sentence,
       List<Span> names) {
-    
+
     boolean alreadyAdded = false;
 
     if (leftContractionPart != null) {
@@ -336,7 +336,7 @@ public class ADNameSampleStream implemen
       if(!alreadyAdded) {
         sentence.addAll(processLexeme(leaf.getLexeme()));
       }
-      
+
       if (namedEntityTag != null) {
         names
             .add(new Span(startOfNamedEntity, sentence.size(), namedEntityTag));
@@ -397,7 +397,7 @@ public class ADNameSampleStream implemen
       suffix.add(Character.toString(last));
       tok = tok.substring(0, tok.length() - 1);
     }
-    
+
     // lets split all hyphens
     if (this.splitHyphenatedTokens && tok.contains("-") && tok.length() > 1) {
       Matcher matcher = hyphenPattern.matcher(tok);
@@ -446,7 +446,7 @@ public class ADNameSampleStream implemen
 
   /**
    * Parse a NER tag in Arvores Deitadas format.
-   * 
+   *
    * @param tags
    *          the NER tag in Arvores Deitadas format
    * @return the NER tag, or null if not a NER tag in Arvores Deitadas format
@@ -475,7 +475,7 @@ public class ADNameSampleStream implemen
   public void close() throws IOException {
     adSentenceStream.close();
   }
-  
+
   enum Type {
     ama, cie, lit
   }
@@ -483,15 +483,15 @@ public class ADNameSampleStream implemen
   private Type corpusType = null;
 
   private Pattern metaPattern;
-  
+
   // works for Amazonia
 //  private static final Pattern meta1 = Pattern
 //      .compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*");
-//  
+//
 //  // works for selva cie
 //  private static final Pattern meta2 = Pattern
 //    .compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*");
-  
+
   private int textIdMeta2 = -1;
   private String textMeta2 = "";
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java Fri May  2 12:34:23 2014
@@ -49,7 +49,7 @@ public class ADNameSampleStreamFactory e
 
     @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.")
     File getData();
-    
+
     @ParameterDescription(valueName = "split", description = "if true all hyphenated tokens will be separated (default true)")
     @OptionalParameter(defaultValue = "true")
     Boolean getSplitHyphenatedTokens();

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java Fri May  2 12:34:23 2014
@@ -46,7 +46,7 @@ public class ADPOSSampleStream implement
    * Creates a new {@link POSSample} stream from a line stream, i.e.
    * {@link ObjectStream}&lt;{@link String}&gt;, that could be a
    * {@link PlainTextByLineStream} object.
-   * 
+   *
    * @param lineStream
    *          a stream of lines as {@link String}
    * @param expandME
@@ -65,7 +65,7 @@ public class ADPOSSampleStream implement
 
   /**
    * Creates a new {@link POSSample} stream from a {@link InputStream}
-   * 
+   *
    * @param in
    *          the Corpus {@link InputStream}
    * @param charsetName
@@ -90,10 +90,10 @@ public class ADPOSSampleStream implement
       throw new IllegalStateException(e);
     }
   }
-  
+
   /**
    * Creates a new {@link POSSample} stream from a {@link InputStream}
-   * 
+   *
    * @param in
    *          the Corpus {@link InputStream}
    * @param charsetName

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java Fri May  2 12:34:23 2014
@@ -55,7 +55,7 @@ public class ADSentenceSampleStream impl
    * Creates a new {@link SentenceSample} stream from a line stream, i.e.
    * {@link ObjectStream}&lt;{@link String}&gt;, that could be a
    * {@link PlainTextByLineStream} object.
-   * 
+   *
    * @param lineStream
    *          a stream of lines as {@link String}
    * @param includeHeadlines
@@ -70,7 +70,7 @@ public class ADSentenceSampleStream impl
 
   /**
    * Creates a new {@link SentenceSample} stream from a {@link FileInputStream}
-   * 
+   *
    * @param in
    *          input stream from the corpus
    * @param charsetName
@@ -91,10 +91,10 @@ public class ADSentenceSampleStream impl
     Arrays.sort(ptEosCharacters);
     this.isIncludeTitles = includeHeadlines;
   }
-  
+
   /**
    * Creates a new {@link SentenceSample} stream from a {@link FileInputStream}
-   * 
+   *
    * @param in
    *          input stream from the corpus
    * @param charsetName

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java Fri May  2 12:34:23 2014
@@ -38,8 +38,8 @@ import opennlp.tools.util.ObjectStream;
  * Susana Afonso.
  * "Árvores deitadas: Descrição do formato e das opções de análise na Floresta Sintáctica"
  * .<br>
- * 12 de Fevereiro de 2006. 
- * http://www.linguateca.pt/documentos/Afonso2006ArvoresDeitadas.pdf 
+ * 12 de Fevereiro de 2006.
+ * http://www.linguateca.pt/documentos/Afonso2006ArvoresDeitadas.pdf
  * <p>
  * <b>Note:</b> Do not use this class, internal use only!
  */
@@ -51,7 +51,7 @@ public class ADSentenceStream extends
     private String text;
     private Node root;
     private String metadata;
-    
+
     public static final String META_LABEL_FINAL = "final";
 
     public String getText() {
@@ -94,11 +94,11 @@ public class ADSentenceStream extends
     private Pattern bizarreLeafPattern = Pattern
     		.compile("^([=-]*)([^:=]+=[^\\(\\s]+)\\(([\"'].+[\"'])?\\s*([^\\)]+)?\\)\\s+(.+)");
     private Pattern punctuationPattern = Pattern.compile("^(=*)(\\W+)$");
-    
+
     private String text,meta;
 
-    /** 
-     * Parse the sentence 
+    /**
+     * Parse the sentence
      */
     public Sentence parse(String sentenceString, int para, boolean isTitle, boolean isBox) {
       BufferedReader reader = new BufferedReader(new StringReader(
@@ -108,9 +108,9 @@ public class ADSentenceStream extends
       try {
         // first line is <s ...>
         String line = reader.readLine();
-        
+
         boolean useSameTextAndMeta = false; // to handle cases where there are diff sug of parse (&&)
-        
+
           // should find the source source
           while (!line.startsWith("SOURCE")) {
         	  if(line.equals("&&")) {
@@ -152,21 +152,21 @@ public class ADSentenceStream extends
         while(line != null && line.startsWith("###")) {
         	line = reader.readLine();
         }
-        
+
         // got the root. Add it to the stack
         Stack<Node> nodeStack = new Stack<Node>();
 
         root.setSyntacticTag("ROOT");
         root.setLevel(0);
         nodeStack.add(root);
-        
-        
+
+
         /* now we have to take care of the lastLevel. Every time it raises, we will add the
         leaf to the node at the top. If it decreases, we remove the top. */
-        
+
         while (line != null && line.length() != 0 && line.startsWith("</s>") == false && !line.equals("&&")) {
           TreeElement element = this.getElement(line);
-          
+
           if(element != null) {
             // The idea here is to keep a stack of nodes that are candidates for
             // parenting the following elements (nodes and leafs).
@@ -177,14 +177,14 @@ public class ADSentenceStream extends
                 && element.getLevel() <= nodeStack.peek().getLevel()) {
               Node nephew = nodeStack.pop();
             }
-            
+
             if( element.isLeaf() ) {
               // 2a) If the element is a leaf and there is no parent candidate,
-              // add it as a daughter of the root.  
+              // add it as a daughter of the root.
               if (nodeStack.isEmpty()) {
                 root.addElement(element);
               } else {
-                // 2b) There are parent candidates. 
+                // 2b) There are parent candidates.
                 // look for the node with the correct level
                 Node peek = nodeStack.peek();
                 if (element.level == 0) { // add to the root
@@ -209,7 +209,7 @@ public class ADSentenceStream extends
               }
             } else {
               // 3) Check if the element that is at the top of the stack is this
-              // node parent, if yes add it as a son 
+              // node parent, if yes add it as a son
               if (!nodeStack.isEmpty() && nodeStack.peek().getLevel() < element.getLevel()) {
                   nodeStack.peek().addElement(element);
               } else {
@@ -217,7 +217,7 @@ public class ADSentenceStream extends
               }
               // 4) Add it to the stack so it is a parent candidate.
               nodeStack.push((Node) element);
-              
+
             }
           }
           line = reader.readLine();
@@ -241,14 +241,14 @@ public class ADSentenceStream extends
 
     /**
      * Parse a tree element from a AD line
-     * 
+     *
      * @param line
      *          the AD line
      * @return the tree element
      */
     public TreeElement getElement(String line) {
       // Note: all levels are higher than 1, because 0 is reserved for the root.
-      
+
       // try node
       Matcher nodeMatcher = nodePattern.matcher(line);
       if (nodeMatcher.matches()) {
@@ -295,7 +295,7 @@ public class ADSentenceStream extends
       if(line.equals("_") || line.startsWith("<lixo") || line.startsWith("pause")) {
       	return null;
       }
-      
+
       if(line.startsWith("=")) {
       	Matcher bizarreLeafMatcher = bizarreLeafPattern.matcher(line);
         if (bizarreLeafMatcher.matches()) {
@@ -320,21 +320,21 @@ public class ADSentenceStream extends
         } else {
         	int level = line.lastIndexOf("=") + 1;
         	String lexeme = line.substring(level + 1);
-        	
+
         	if(lexeme.matches("\\w.*?[\\.<>].*")) {
         	  return null;
         	}
-        	
+
         	 Leaf leaf = new Leaf();
            leaf.setLevel(level + 1);
            leaf.setSyntacticTag("");
            leaf.setMorphologicalTag("");
            leaf.setLexeme(lexeme);
-           
+
            return leaf;
         }
       }
-      
+
       System.err.println("Couldn't parse leaf: " + line);
       Leaf leaf = new Leaf();
       leaf.setLevel(1);
@@ -351,7 +351,7 @@ public class ADSentenceStream extends
       private String syntacticTag;
       private String morphologicalTag;
       private int level;
-      
+
       public boolean isLeaf() {return false;}
 
       public void setSyntacticTag(String syntacticTag) {
@@ -420,11 +420,11 @@ public class ADSentenceStream extends
 
       @Override
       public boolean isLeaf() {return true;}
-      
+
       public void setFunctionalTag(String funcTag) {
         this.functionalTag = funcTag;
       }
-      
+
       public String getFunctionalTag(){
         return this.functionalTag;
       }
@@ -432,7 +432,7 @@ public class ADSentenceStream extends
       public void setSecondaryTag(String secondaryTag) {
         this.secondaryTag = secondaryTag;
       }
-      
+
       public String getSecondaryTag() {
         return this.secondaryTag;
       }
@@ -444,7 +444,7 @@ public class ADSentenceStream extends
       public String getLexeme() {
         return word;
       }
-      
+
       private String emptyOrString(String value, String prefix, String suffix) {
         if(value == null) return "";
         return prefix + value + suffix;
@@ -478,46 +478,46 @@ public class ADSentenceStream extends
     }
 
   }
-  
-  /** 
-   * The start sentence pattern 
+
+  /**
+   * The start sentence pattern
    */
   private static final Pattern sentStart = Pattern.compile("<s[^>]*>");
 
-  /** 
-   * The end sentence pattern 
+  /**
+   * The end sentence pattern
    */
   private static final Pattern sentEnd = Pattern.compile("</s>");
   private static final Pattern extEnd = Pattern.compile("</ext>");
-  
-  /** 
-   * The start sentence pattern 
+
+  /**
+   * The start sentence pattern
    */
   private static final Pattern titleStart = Pattern.compile("<t[^>]*>");
 
-  /** 
-   * The end sentence pattern 
+  /**
+   * The end sentence pattern
    */
   private static final Pattern titleEnd = Pattern.compile("</t>");
-  
-  /** 
-   * The start sentence pattern 
+
+  /**
+   * The start sentence pattern
    */
   private static final Pattern boxStart = Pattern.compile("<caixa[^>]*>");
 
-  /** 
-   * The end sentence pattern 
+  /**
+   * The end sentence pattern
    */
   private static final Pattern boxEnd = Pattern.compile("</caixa>");
-  
-  
-  /** 
-   * The start sentence pattern 
+
+
+  /**
+   * The start sentence pattern
    */
   private static final Pattern paraStart = Pattern.compile("<p[^>]*>");
 
-  /** 
-   * The start sentence pattern 
+  /**
+   * The start sentence pattern
    */
   private static final Pattern textStart = Pattern.compile("<ext[^>]*>");
 
@@ -526,12 +526,12 @@ public class ADSentenceStream extends
   private int paraID = 0;
   private boolean isTitle = false;
   private boolean isBox = false;
-  
+
   public ADSentenceStream(ObjectStream<String> lineStream) {
     super(lineStream);
     parser = new SentenceParser();
   }
-  
+
 
   public Sentence read() throws IOException {
 
@@ -542,7 +542,7 @@ public class ADSentenceStream extends
       String line = samples.read();
 
       if (line != null) {
-    	  
+
     	  if(sentenceStarted) {
     		  if (sentEnd.matcher(line).matches() || extEnd.matcher(line).matches()) {
 		          sentenceStarted = false;