You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@opennlp.apache.org by rz...@apache.org on 2023/01/30 18:57:43 UTC

[opennlp-sandbox] branch introduce-parent-pom-for-opennlp-sandbox-normalize-endings created (now 15eb920)

This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a change to branch introduce-parent-pom-for-opennlp-sandbox-normalize-endings
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


      at 15eb920  Normalize all the line endings

This branch includes the following new commits:

     new 15eb920  Normalize all the line endings

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[opennlp-sandbox] 01/01: Normalize all the line endings

Posted by rz...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch introduce-parent-pom-for-opennlp-sandbox-normalize-endings
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git

commit 15eb920efeb322012e51098976cc59335668e184
Author: Richard Zowalla <rz...@apache.org>
AuthorDate: Mon Jan 30 19:56:29 2023 +0100

    Normalize all the line endings
---
 .../caseditor/util/ContainingConstraint.java       |    144 +-
 .../modelbuilder/DefaultModelBuilderUtil.java      |    230 +-
 .../addons/modelbuilder/KnownEntityProvider.java   |     84 +-
 .../modelbuilder/ModelGenerationValidator.java     |     60 +-
 .../addons/modelbuilder/ModelParameter.java        |     48 +-
 .../opennlp/addons/modelbuilder/Modelable.java     |     84 +-
 .../modelbuilder/SemiSupervisedModelGenerator.java |     48 +-
 .../addons/modelbuilder/SentenceProvider.java      |     48 +-
 .../modelbuilder/impls/BaseModelBuilderParams.java |    176 +-
 .../impls/FileKnownEntityProvider.java             |    150 +-
 .../modelbuilder/impls/FileModelValidatorImpl.java |    188 +-
 .../modelbuilder/impls/FileSentenceProvider.java   |    140 +-
 .../modelbuilder/impls/GenericModelGenerator.java  |    208 +-
 .../modelbuilder/impls/GenericModelableImpl.java   |    256 +-
 opennlp-similarity/README.txt                      |    276 +-
 opennlp-similarity/RELEASE_NOTES.html              |    152 +-
 .../multithreaded/BingWebQueryRunnerThread.java    |    108 +-
 .../apps/contentgen/multithreaded/Fragment.java    |    176 +-
 .../apps/contentgen/multithreaded/MyEvent.java     |     22 +-
 .../contentgen/multithreaded/MyEventListener.java  |     16 +-
 .../contentgen/multithreaded/cgRequestForm.html    |     74 +-
 .../multithreaded/nlProg2codeRequestForm.html      |     94 +-
 .../apps/object_dedup/SimilarityAccessorBase.java  |   1476 +-
 .../apps/relevanceVocabs/PhraseProcessor.java      |    464 +-
 .../apps/relevanceVocabs/SynonymListFilter.java    |    206 +-
 .../tools/apps/relevanceVocabs/SynonymMap.java     |    704 +-
 .../BingAPIProductSearchManager.java               |    136 +-
 .../review_builder/FBOpenGraphSearchManager.java   |    282 +-
 .../review_builder/MachineTranslationWrapper.java  |    172 +-
 .../review_builder/MinedSentenceProcessor.java     |    418 +-
 .../apps/review_builder/ReviewBuilderRunner.java   |    330 +-
 .../tools/apps/review_builder/ReviewObj.java       |    274 +-
 .../review_builder/SentenceBeingOriginalized.java  |    118 +-
 .../apps/review_builder/SentenceOriginalizer.java  |    796 +-
 .../URLsWithReviewFinderByProductName.java         |     42 +-
 .../review_builder/WebPageReviewExtractor.java     |    872 +-
 .../tools/apps/utils/email/EmailSender.java        |    342 +-
 .../tools/apps/utils/email/SMTP_Authenticator.java |     48 +-
 .../ClassifierTrainingSetIndexer.java              |    516 +-
 .../java/opennlp/tools/fca/BasicLevelMetrics.java  |   1300 +-
 .../java/opennlp/tools/fca/ConceptLattice.java     |    570 +-
 .../main/java/opennlp/tools/fca/FcaConverter.java  |    144 +-
 .../src/main/java/opennlp/tools/fca/FcaReader.java |    190 +-
 .../src/main/java/opennlp/tools/fca/FcaWriter.java |    262 +-
 .../main/java/opennlp/tools/fca/FormalConcept.java |    302 +-
 .../src/main/java/opennlp/tools/fca/Measures.java  |    294 +-
 .../opennlp/tools/fca/RandomNoiseGenerator.java    |    124 +-
 .../jsmlearning/FeatureSpaceCoverageProcessor.java |    630 +-
 .../tools/jsmlearning/IntersectionSetBuilder.java  |    722 +-
 .../tools/jsmlearning/JSMLearnerOnLatticeBase.java |    676 +-
 .../JSMLearnerOnLatticeWithAbduction.java          |    174 +-
 .../JSMLearnerOnLatticeWithDeduction.java          |    484 +-
 .../tools/jsmlearning/ProfileReaderWriter.java     |    326 +-
 .../tools/jsmlearning/TreeKernelRunner.java        |    256 +-
 .../main/java/opennlp/tools/nl2code/NL2Obj.java    |    600 +-
 .../opennlp/tools/nl2code/NL2ObjCreateAssign.java  |    506 +-
 .../opennlp/tools/nl2code/ObjectControlOp.java     |    118 +-
 .../java/opennlp/tools/nl2code/ObjectPhrase.java   |    248 +-
 .../tools/nl2code/ObjectPhraseListForSentence.java |    206 +-
 .../java/opennlp/tools/parse_thicket/ArcType.java  |    106 +-
 .../opennlp/tools/parse_thicket/IGeneralizer.java  |     24 +-
 .../opennlp/tools/parse_thicket/ParseTreeNode.java |    472 +-
 .../java/opennlp/tools/parse_thicket/Triple.java   |     96 +-
 .../WordWordInterSentenceRelationArc.java          |    136 +-
 .../BingQueryRunnerMultipageSearchResults.java     |    144 +-
 .../parse_thicket/apps/MinedSentenceProcessor.java |    418 +-
 .../apps/MostFrequentWordsFromPageGetter.java      |    140 +-
 .../apps/MultiSentenceSearchResultsProcessor.java  |    370 +-
 .../parse_thicket/apps/SnippetToParagraph.java     |    756 +-
 .../apps/WebPageContentSentenceExtractor.java      |    294 +-
 .../tools/parse_thicket/apps/WebPageExtractor.java |    316 +-
 .../CommunicativeActionsArcBuilder.java            |    322 +-
 .../CommunicativeActionsAttribute.java             |     58 +-
 .../parse_thicket/matching/LemmaGeneralizer.java   |    188 +-
 .../matching/NERPhraseGeneralizer.java             |    542 +-
 .../parse_thicket/matching/PhraseGeneralizer.java  |    536 +-
 .../matching/PhraseGroupGeneralizer.java           |    230 +-
 .../LinguisticPatternStructure.java                |    388 +-
 .../pattern_structure/PatternStructureWriter.java  |    116 +-
 .../pattern_structure/PhraseConcept.java           |    168 +-
 .../pattern_structure/PhrasePatternStructure.java  |    384 +-
 .../tools/similarity/apps/BingQueryRunner.java     |    396 +-
 .../tools/similarity/apps/BingResponse.java        |    204 +-
 .../tools/similarity/apps/BingWebQueryRunner.java  |    230 +-
 .../tools/similarity/apps/ContentGenerator.java    |    850 +-
 .../similarity/apps/ContentGeneratorRunner.java    |    192 +-
 .../similarity/apps/ContentGeneratorSupport.java   |    984 +-
 .../opennlp/tools/similarity/apps/Fragment.java    |    194 +-
 .../apps/GeneratedSentenceProcessor.java           |    630 +-
 .../opennlp/tools/similarity/apps/HitBase.java     |    520 +-
 .../tools/similarity/apps/HitBaseComparable.java   |     52 +-
 .../similarity/apps/RelatedSentenceFinder.java     |   1928 +-
 .../similarity/apps/RelatedSentenceFinderML.java   |    574 +-
 .../similarity/apps/SearchResultsProcessor.java    |    226 +-
 .../similarity/apps/SentenceTranslate.java.txt     |    422 +-
 .../apps/SpeechRecognitionResultsProcessor.java    |    338 +-
 .../similarity/apps/StoryDiscourseNavigator.java   |    326 +-
 .../apps/WebSearchEngineResultsScraper.java        |    516 +-
 .../tools/similarity/apps/YahooAnswersMiner.java   |    188 +-
 .../java/opennlp/tools/similarity/apps/gen.txt     |    110 +-
 .../tools/similarity/apps/solr/Comment.java        |    222 +-
 .../tools/similarity/apps/solr/CommentsRel.java    |    256 +-
 .../apps/solr/ContentGeneratorRequestHandler.java  |    482 +-
 .../apps/solr/IterativeQueryComponent.java         |    374 +-
 .../apps/solr/NLProgram2CodeRequestHandler.java    |    178 +-
 .../apps/solr/QueryExpansionRequestHandler.java    |    104 +-
 .../solr/SearchResultsReRankerRequestHandler.java  |    456 +-
 .../tools/similarity/apps/solr/WordDocBuilder.java |    496 +-
 .../apps/solr/WordDocBuilderEndNotes.java          |    422 +-
 .../solr/WordDocBuilderSingleImageSearchCall.java  |    330 +-
 .../tools/similarity/apps/solr/cgRequestForm.html  |    314 +-
 .../similarity/apps/taxo_builder/AriAdapter.java   |    186 +-
 .../apps/taxo_builder/DomainTaxonomyExtender.java  |    458 +-
 .../taxo_builder/TaxoQuerySnapshotMatcher.java     |    322 +-
 .../taxo_builder/TaxonomyExtenderViaMebMining.java |    374 +-
 .../apps/taxo_builder/TaxonomySerializer.java      |    264 +-
 .../similarity/apps/utils/CountItemsList.java      |    126 +-
 .../tools/similarity/apps/utils/FileHandler.java   |    770 +-
 .../apps/utils/LevensteinDistanceFinder.java       |    290 +-
 .../tools/similarity/apps/utils/PageFetcher.java   |    304 +-
 .../tools/similarity/apps/utils/StringCleaner.java |     58 +-
 .../apps/utils/StringDistanceMeasurer.java         |    660 +-
 .../opennlp/tools/similarity/apps/utils/Utils.java |   1440 +-
 .../tools/similarity/apps/utils/ValueSortMap.java  |    572 +-
 .../main/java/opennlp/tools/stemmer/PStemmer.java  |   1034 +-
 .../textsimilarity/EpistemicStatesTrainingSet.java |    226 +-
 .../textsimilarity/GeneralizationListReducer.java  |    300 +-
 .../opennlp/tools/textsimilarity/LemmaPair.java    |    162 +-
 .../opennlp/tools/textsimilarity/POSManager.java   |    130 +-
 .../tools/textsimilarity/ParseTreeChunk.java       |   1140 +-
 .../textsimilarity/ParseTreeChunkComparable.java   |     64 +-
 .../tools/textsimilarity/ParseTreeMatcher.java     |    508 +-
 .../ParseTreeMatcherDeterministic.java             |    550 +-
 .../textsimilarity/SentencePairMatchResult.java    |    200 +-
 .../textsimilarity/TextSimilarityBagOfWords.java   |   1786 +-
 .../chunker2matcher/ParserCacheSerializer.java     |    296 +-
 .../java/opennlp/tools/textsimilarity/readme.txt   |    270 +-
 opennlp-similarity/src/main/readme/LICENSE         |    458 +-
 opennlp-similarity/src/main/readme/NOTICE          |     18 +-
 .../MultiSentenceSearchResultsProcessorTest.java   |    142 +-
 .../apps/RelatedSentenceFinderTest.java            |    280 +-
 .../apps/StoryDiscourseNavigatorTest.java          |     78 +-
 .../pattern_structure/JSMLearnerOnLatticeTest.java |    616 +-
 .../pattern_structure/PhraseTest.java              |    338 +-
 .../apps/SearchResultsProcessorTest.java           |    112 +-
 .../apps/taxo_builder/TaxonomyBuildMatchTest.java  |    108 +-
 .../ParserChunker2MatcherProcessorTest.java        |    270 +-
 .../chunker2matcher/PhraseNodeTest.java            |    120 +-
 .../src/test/resources/fca/sports.cxt              |    110 +-
 .../src/test/resources/taxonomies/irs_dom.ari      |   3074 +-
 .../src/test/resources/taxonomies/taxo_English.xml | 230202 +++++++++---------
 .../DisambiguatorEvaluatorParams.java              |     82 +-
 .../disambiguator/DisambiguatorEvaluatorTool.java  |    190 +-
 .../cmdline/disambiguator/DisambiguatorTool.java   |    274 +-
 .../disambiguator/DisambiguatorToolParams.java     |     82 +-
 .../disambiguator/IMSWSDContextGenerator.java      |    326 +-
 .../disambiguator/IMSWSDSequenceValidator.java     |    100 +-
 .../main/java/opennlp/tools/disambiguator/MFS.java |    306 +-
 .../disambiguator/OSCCWSDContextGenerator.java     |    224 +-
 .../tools/disambiguator/WSDContextGenerator.java   |     68 +-
 .../opennlp/tools/disambiguator/WSDHelper.java     |   1386 +-
 .../java/opennlp/tools/disambiguator/WSDModel.java |    308 +-
 .../opennlp/tools/disambiguator/WSDSample.java     |    452 +-
 .../tools/disambiguator/WSDSampleStream.java       |    156 +-
 .../disambiguator/WSDisambiguatorFactory.java      |    122 +-
 .../tools/disambiguator/datareader/Paragraph.java  |    194 +-
 .../datareader/SemcorReaderExtended.java           |    684 +-
 .../disambiguator/datareader/SensevalReader.java   |    610 +-
 .../tools/disambiguator/datareader/Sentence.java   |    174 +-
 .../tools/disambiguator/datareader/Word.java       |    598 +-
 .../tools/disambiguator/MFSEvaluatorTest.java      |    130 +-
 .../opennlp/tools/disambiguator/MFSTester.java     |    262 +-
 172 files changed, 145986 insertions(+), 145986 deletions(-)

diff --git a/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/util/ContainingConstraint.java b/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/util/ContainingConstraint.java
index 75e267e..c5645fd 100644
--- a/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/util/ContainingConstraint.java
+++ b/caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/util/ContainingConstraint.java
@@ -1,73 +1,73 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */ 
-
-package org.apache.opennlp.caseditor.util;
-
-import java.util.Collection;
-import java.util.LinkedList;
-
-import org.apache.uima.cas.FSMatchConstraint;
-import org.apache.uima.cas.FeatureStructure;
-import org.apache.uima.cas.text.AnnotationFS;
-
-/**
- * Checks if an AnnotationFS is contained by the given AnnotationFS.
- */
-public final class ContainingConstraint implements FSMatchConstraint {
-  private static final long serialVersionUID = 1;
-
-  private final Collection<AnnotationFS> mContainingAnnotations = new LinkedList<>();
-
-  /**
-   * Initializes a new instance.
-   */
-  public ContainingConstraint() {
-    // does currently nothing
-  }
-
-  /**
-   * Initializes a new instance.
-   */
-  public ContainingConstraint(AnnotationFS containingAnnotation) {
-    mContainingAnnotations.add(containingAnnotation);
-  }
-  
-  /**
-   * Checks if the given FeatureStructure match the constraint.
-   */
-  public boolean match(FeatureStructure featureStructure) {
-    if (!(featureStructure instanceof AnnotationFS)) {
-      return false;
-    }
-
-    AnnotationFS annotation = (AnnotationFS) featureStructure;
-
-    for (AnnotationFS containingAnnotation : mContainingAnnotations) {
-      if (isContaining(annotation, containingAnnotation)) {
-        return true;
-      }
-    }
-
-    return false;
-  }
-
-  private boolean isContaining(AnnotationFS annotation, AnnotationFS containing) {
-    return (containing.getBegin() <= annotation.getBegin())
-            && (containing.getEnd() >= annotation.getEnd());
-  }
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */ 
+
+package org.apache.opennlp.caseditor.util;
+
+import java.util.Collection;
+import java.util.LinkedList;
+
+import org.apache.uima.cas.FSMatchConstraint;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.text.AnnotationFS;
+
+/**
+ * Checks if an AnnotationFS is contained by the given AnnotationFS.
+ */
+public final class ContainingConstraint implements FSMatchConstraint {
+  private static final long serialVersionUID = 1;
+
+  private final Collection<AnnotationFS> mContainingAnnotations = new LinkedList<>();
+
+  /**
+   * Initializes a new instance.
+   */
+  public ContainingConstraint() {
+    // does currently nothing
+  }
+
+  /**
+   * Initializes a new instance.
+   */
+  public ContainingConstraint(AnnotationFS containingAnnotation) {
+    mContainingAnnotations.add(containingAnnotation);
+  }
+  
+  /**
+   * Checks if the given FeatureStructure match the constraint.
+   */
+  public boolean match(FeatureStructure featureStructure) {
+    if (!(featureStructure instanceof AnnotationFS)) {
+      return false;
+    }
+
+    AnnotationFS annotation = (AnnotationFS) featureStructure;
+
+    for (AnnotationFS containingAnnotation : mContainingAnnotations) {
+      if (isContaining(annotation, containingAnnotation)) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  private boolean isContaining(AnnotationFS annotation, AnnotationFS containing) {
+    return (containing.getBegin() <= annotation.getBegin())
+            && (containing.getEnd() >= annotation.getEnd());
+  }
+
 }
\ No newline at end of file
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
index 6fe5937..cd73202 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
@@ -1,115 +1,115 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder;
-
-import java.io.File;
-
-import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
-import opennlp.addons.modelbuilder.impls.FileKnownEntityProvider;
-import opennlp.addons.modelbuilder.impls.FileModelValidatorImpl;
-import opennlp.addons.modelbuilder.impls.FileSentenceProvider;
-import opennlp.addons.modelbuilder.impls.GenericModelGenerator;
-import opennlp.addons.modelbuilder.impls.GenericModelableImpl;
-
-/**
- * Utilizes the file-based implementations to produce an NER model from user
- * The basic processing is such
- * read in the list of known entities
- * annotate the sentences based on the list of known entities
- * create a model from the annotations
- * perform NER with the model on the sentences
- * add the NER results to the annotations
- * rebuild the model loop defined data.
- */
-public class DefaultModelBuilderUtil {
-
-  /**
-   *
-   * @param sentences                a file that contains one sentence per line.
-   *                                 There should be at least 15K sentences
-   *                                 consisting of a representative sample from
-   *                                 user data
-   * @param knownEntities            a file consisting of a simple list of
-   *                                 unambiguous entities, one entry per line.
-   *                                 For instance, if one was trying to build a
-   *                                 person NER model then this file would be a
-   *                                 list of person names that are unambiguous
-   *                                 and are known to exist in the sentences
-   *                                 file
-   * @param knownEntitiesBlacklist   This file contains a list of known bad hits
-   *                                 that the NER phase of this processing might
-   *                                 catch early one before the model iterates
-   *                                 to maturity
-   * @param modelOutFile             the location where the model will be
-   *                                 written to
-   * @param annotatedSentenceOutFile where the annotated sentences produced by
-   *                                 this process will be written to
-   * @param namedEntityType          the type of entity... for example, person,
-   *                                 location, organization...
-   * @param iterations               how many times to repeat the iterative loop
-   *                                 of annotation, model generation, and NER
-   */
-  public static void generateModel(File sentences, File knownEntities, File knownEntitiesBlacklist,
-          File modelOutFile, File annotatedSentenceOutFile, String namedEntityType, int iterations) {
-    SemiSupervisedModelGenerator modelGenerator = new GenericModelGenerator();
-    BaseModelBuilderParams params = new BaseModelBuilderParams();
-    params.setAnnotatedTrainingDataFile(annotatedSentenceOutFile);
-    params.setSentenceFile(sentences);
-    params.setEntityType(namedEntityType);
-    params.setKnownEntitiesFile(knownEntities);
-    params.setModelFile(modelOutFile);
-    params.setKnownEntityBlacklist(knownEntitiesBlacklist);
-    /*
-     * sentence providers feed this process with user data derived sentences
-     * this impl just reads line by line through a file
-     */
-    SentenceProvider sentenceProvider = new FileSentenceProvider();
-    sentenceProvider.setParameters(params);
-    /*
-     * KnownEntityProviders provide a seed list of known entities... such as
-     * Barack Obama for person, or Germany for location obviously these would
-     * want to be prolific, non ambiguous names
-     */
-    KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
-    knownEntityProvider.setParameters(params);
-    /*
-     * ModelGenerationValidators try to weed out bad hits by the iterations of
-     * the name finder. Since this is a recursive process, with each iteration
-     * the namefinder will get more and more greedy if bad entities are allowed
-     * in this provides a mechanism for throwing out obviously bad hits. A good
-     * impl may be to make sure a location is actually within a noun phrase
-     * etc...users can make this as specific as they need for their dat and
-     * their use case
-     */
-    ModelGenerationValidator validator = new FileModelValidatorImpl();
-    validator.setParameters(params);
-    /*
-     * Modelable's write and read the annotated sentences, as well as create and
-     * write the NER models
-     */
-    Modelable modelable = new GenericModelableImpl();
-    modelable.setParameters(params);
-
-    /*
-     * the modelGenerator actually runs the process with a set number of
-     * iterations... could be better by actually calculating the diff between
-     * runs and stopping based on a threshold, but for extremely large sentence
-     * sets this may be too much.
-     */
-    modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, iterations);
-
-  }
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder;
+
+import java.io.File;
+
+import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
+import opennlp.addons.modelbuilder.impls.FileKnownEntityProvider;
+import opennlp.addons.modelbuilder.impls.FileModelValidatorImpl;
+import opennlp.addons.modelbuilder.impls.FileSentenceProvider;
+import opennlp.addons.modelbuilder.impls.GenericModelGenerator;
+import opennlp.addons.modelbuilder.impls.GenericModelableImpl;
+
+/**
+ * Utilizes the file-based implementations to produce an NER model from user
+ * The basic processing is such
+ * read in the list of known entities
+ * annotate the sentences based on the list of known entities
+ * create a model from the annotations
+ * perform NER with the model on the sentences
+ * add the NER results to the annotations
+ * rebuild the model loop defined data.
+ */
+public class DefaultModelBuilderUtil {
+
+  /**
+   *
+   * @param sentences                a file that contains one sentence per line.
+   *                                 There should be at least 15K sentences
+   *                                 consisting of a representative sample from
+   *                                 user data
+   * @param knownEntities            a file consisting of a simple list of
+   *                                 unambiguous entities, one entry per line.
+   *                                 For instance, if one was trying to build a
+   *                                 person NER model then this file would be a
+   *                                 list of person names that are unambiguous
+   *                                 and are known to exist in the sentences
+   *                                 file
+   * @param knownEntitiesBlacklist   This file contains a list of known bad hits
+   *                                 that the NER phase of this processing might
+   *                                 catch early one before the model iterates
+   *                                 to maturity
+   * @param modelOutFile             the location where the model will be
+   *                                 written to
+   * @param annotatedSentenceOutFile where the annotated sentences produced by
+   *                                 this process will be written to
+   * @param namedEntityType          the type of entity... for example, person,
+   *                                 location, organization...
+   * @param iterations               how many times to repeat the iterative loop
+   *                                 of annotation, model generation, and NER
+   */
+  public static void generateModel(File sentences, File knownEntities, File knownEntitiesBlacklist,
+          File modelOutFile, File annotatedSentenceOutFile, String namedEntityType, int iterations) {
+    SemiSupervisedModelGenerator modelGenerator = new GenericModelGenerator();
+    BaseModelBuilderParams params = new BaseModelBuilderParams();
+    params.setAnnotatedTrainingDataFile(annotatedSentenceOutFile);
+    params.setSentenceFile(sentences);
+    params.setEntityType(namedEntityType);
+    params.setKnownEntitiesFile(knownEntities);
+    params.setModelFile(modelOutFile);
+    params.setKnownEntityBlacklist(knownEntitiesBlacklist);
+    /*
+     * sentence providers feed this process with user data derived sentences
+     * this impl just reads line by line through a file
+     */
+    SentenceProvider sentenceProvider = new FileSentenceProvider();
+    sentenceProvider.setParameters(params);
+    /*
+     * KnownEntityProviders provide a seed list of known entities... such as
+     * Barack Obama for person, or Germany for location obviously these would
+     * want to be prolific, non ambiguous names
+     */
+    KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
+    knownEntityProvider.setParameters(params);
+    /*
+     * ModelGenerationValidators try to weed out bad hits by the iterations of
+     * the name finder. Since this is a recursive process, with each iteration
+     * the namefinder will get more and more greedy if bad entities are allowed
+     * in this provides a mechanism for throwing out obviously bad hits. A good
+     * impl may be to make sure a location is actually within a noun phrase
+     * etc...users can make this as specific as they need for their dat and
+     * their use case
+     */
+    ModelGenerationValidator validator = new FileModelValidatorImpl();
+    validator.setParameters(params);
+    /*
+     * Modelable's write and read the annotated sentences, as well as create and
+     * write the NER models
+     */
+    Modelable modelable = new GenericModelableImpl();
+    modelable.setParameters(params);
+
+    /*
+     * the modelGenerator actually runs the process with a set number of
+     * iterations... could be better by actually calculating the diff between
+     * runs and stopping based on a threshold, but for extremely large sentence
+     * sets this may be too much.
+     */
+    modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, iterations);
+
+  }
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
index fa2a00e..39f5e5d 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
@@ -1,42 +1,42 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder;
-
-import java.util.Set;
-
-/**
- * Supplies a list of known entities (a list of names or locations)
- */
-public interface KnownEntityProvider extends ModelParameter {
-  /**
-   * returns a list of known non ambiguous entities.
-   * @return a set of entities
-   */
-  Set<String> getKnownEntities();
-
-  /**
-   * adds to the set of known entities. Overriding classes should hold this list in a class level set.
-   * @param unambiguousEntity
-   */
-  void addKnownEntity(String unambiguousEntity);
-
-  /**
-   * defines the type of entity that the set contains, ie person, location, organization.
-   * @return
-   */
-  String getKnownEntitiesType();
-
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder;
+
+import java.util.Set;
+
+/**
+ * Supplies a list of known entities (a list of names or locations)
+ */
+public interface KnownEntityProvider extends ModelParameter {
+  /**
+   * returns a list of known non ambiguous entities.
+   * @return a set of entities
+   */
+  Set<String> getKnownEntities();
+
+  /**
+   * adds to the set of known entities. Overriding classes should hold this list in a class level set.
+   * @param unambiguousEntity
+   */
+  void addKnownEntity(String unambiguousEntity);
+
+  /**
+   * defines the type of entity that the set contains, ie person, location, organization.
+   * @return
+   */
+  String getKnownEntitiesType();
+
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
index e8e8f7e..047e8a3 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
@@ -1,30 +1,30 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder;
-
-import java.util.Collection;
-
-/**
- * Validates results from the iterative namefinding
- */
-public interface ModelGenerationValidator extends ModelParameter {
-
-  Boolean validSentence(String sentence);
-
-  Boolean validNamedEntity(String namedEntity);
-
-  Collection<String> getBlackList();
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder;
+
+import java.util.Collection;
+
+/**
+ * Validates results from the iterative namefinding
+ */
+public interface ModelGenerationValidator extends ModelParameter {
+
+  Boolean validSentence(String sentence);
+
+  Boolean validNamedEntity(String namedEntity);
+
+  Collection<String> getBlackList();
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java
index e2e8649..1285323 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java
@@ -1,24 +1,24 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder;
-
-import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
-
-public interface ModelParameter<T extends  BaseModelBuilderParams>{
-   
-  void setParameters(T params);
-
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder;
+
+import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
+
+public interface ModelParameter<T extends  BaseModelBuilderParams>{
+   
+  void setParameters(T params);
+
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java
index 7c8f6a4..2fdcce8 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java
@@ -1,42 +1,42 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder;
-
-import java.util.Set;
-
-import opennlp.tools.namefind.TokenNameFinderModel;
-/**
- *
- */
-public interface Modelable extends ModelParameter {
-
-  String annotate(String sentence, String namedEntity, String entityType);
-
-  void writeAnnotatedSentences();
-
-  Set<String> getAnnotatedSentences();
-
-  void setAnnotatedSentences(Set<String> annotatedSentences);
-
-  void addAnnotatedSentence(String annotatedSentence);
-
-  void buildModel( String entityType);
-
-  TokenNameFinderModel getModel();
-
-  String[] tokenizeSentenceToWords(String sentence);
-
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder;
+
+import java.util.Set;
+
+import opennlp.tools.namefind.TokenNameFinderModel;
+/**
+ *
+ */
+public interface Modelable extends ModelParameter {
+
+  String annotate(String sentence, String namedEntity, String entityType);
+
+  void writeAnnotatedSentences();
+
+  Set<String> getAnnotatedSentences();
+
+  void setAnnotatedSentences(Set<String> annotatedSentences);
+
+  void addAnnotatedSentence(String annotatedSentence);
+
+  void buildModel( String entityType);
+
+  TokenNameFinderModel getModel();
+
+  String[] tokenizeSentenceToWords(String sentence);
+
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java
index 22807c9..0969c14 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java
@@ -1,24 +1,24 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder;
-
-import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
-
-public interface SemiSupervisedModelGenerator extends ModelParameter<BaseModelBuilderParams> {
-
-  void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider, 
-          ModelGenerationValidator validator, Modelable modelable, int iterations);
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder;
+
+import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
+
+public interface SemiSupervisedModelGenerator extends ModelParameter<BaseModelBuilderParams> {
+
+  void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider, 
+          ModelGenerationValidator validator, Modelable modelable, int iterations);
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java
index 1c655ad..ec86739 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java
@@ -1,24 +1,24 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder;
-
-import java.util.Set;
-import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
-
-public interface SentenceProvider extends ModelParameter<BaseModelBuilderParams> {
-
-  Set<String> getSentences();
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder;
+
+import java.util.Set;
+import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
+
+public interface SentenceProvider extends ModelParameter<BaseModelBuilderParams> {
+
+  Set<String> getSentences();
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
index 6173acc..7212629 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
@@ -1,89 +1,89 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder.impls;
-
-import java.io.File;
-import java.util.Map;
-
-/**
- * Used to pass params through the processing
- */
-public class BaseModelBuilderParams {
-
-  private File modelFile;
-  private File sentenceFile;
-  private File knownEntitiesFile;
-  private File knownEntityBlacklist;
-  private File annotatedTrainingDataFile;
-  private String entityType;
-  private Map<String, String> additionalParams;
-
-  public File getModelFile() {
-    return modelFile;
-  }
-
-  public void setModelFile(File modelFile) {
-    this.modelFile = modelFile;
-  }
-
-  public File getSentenceFile() {
-    return sentenceFile;
-  }
-
-  public void setSentenceFile(File sentenceFile) {
-    this.sentenceFile = sentenceFile;
-  }
-
-  public File getKnownEntitiesFile() {
-    return knownEntitiesFile;
-  }
-
-  public void setKnownEntitiesFile(File knownEntitiesFile) {
-    this.knownEntitiesFile = knownEntitiesFile;
-  }
-
-  public File getKnownEntityBlacklist() {
-    return knownEntityBlacklist;
-  }
-
-  public void setKnownEntityBlacklist(File knownEntityBlacklist) {
-    this.knownEntityBlacklist = knownEntityBlacklist;
-  }
-
-  public Map<String, String> getAdditionalParams() {
-    return additionalParams;
-  }
-
-  public void setAdditionalParams(Map<String, String> additionalParams) {
-    this.additionalParams = additionalParams;
-  }
-
-  public String getEntityType() {
-    return entityType;
-  }
-
-  public void setEntityType(String entityType) {
-    this.entityType = entityType;
-  }
-
-  public File getAnnotatedTrainingDataFile() {
-    return annotatedTrainingDataFile;
-  }
-
-  public void setAnnotatedTrainingDataFile(File annotatedTrainingDataFile) {
-    this.annotatedTrainingDataFile = annotatedTrainingDataFile;
-  }
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder.impls;
+
+import java.io.File;
+import java.util.Map;
+
+/**
+ * Used to pass params through the processing
+ */
+public class BaseModelBuilderParams {
+
+  private File modelFile;
+  private File sentenceFile;
+  private File knownEntitiesFile;
+  private File knownEntityBlacklist;
+  private File annotatedTrainingDataFile;
+  private String entityType;
+  private Map<String, String> additionalParams;
+
+  public File getModelFile() {
+    return modelFile;
+  }
+
+  public void setModelFile(File modelFile) {
+    this.modelFile = modelFile;
+  }
+
+  public File getSentenceFile() {
+    return sentenceFile;
+  }
+
+  public void setSentenceFile(File sentenceFile) {
+    this.sentenceFile = sentenceFile;
+  }
+
+  public File getKnownEntitiesFile() {
+    return knownEntitiesFile;
+  }
+
+  public void setKnownEntitiesFile(File knownEntitiesFile) {
+    this.knownEntitiesFile = knownEntitiesFile;
+  }
+
+  public File getKnownEntityBlacklist() {
+    return knownEntityBlacklist;
+  }
+
+  public void setKnownEntityBlacklist(File knownEntityBlacklist) {
+    this.knownEntityBlacklist = knownEntityBlacklist;
+  }
+
+  public Map<String, String> getAdditionalParams() {
+    return additionalParams;
+  }
+
+  public void setAdditionalParams(Map<String, String> additionalParams) {
+    this.additionalParams = additionalParams;
+  }
+
+  public String getEntityType() {
+    return entityType;
+  }
+
+  public void setEntityType(String entityType) {
+    this.entityType = entityType;
+  }
+
+  public File getAnnotatedTrainingDataFile() {
+    return annotatedTrainingDataFile;
+  }
+
+  public void setAnnotatedTrainingDataFile(File annotatedTrainingDataFile) {
+    this.annotatedTrainingDataFile = annotatedTrainingDataFile;
+  }
 }
\ No newline at end of file
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
index 841f6db..69fce62 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
@@ -1,75 +1,75 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder.impls;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import opennlp.addons.modelbuilder.KnownEntityProvider;
-
-public class FileKnownEntityProvider implements KnownEntityProvider {
- 
-  final Set<String> knownEntities = new HashSet<>();
-  BaseModelBuilderParams params;
-
-  @Override
-  public Set<String> getKnownEntities() {
-    if (knownEntities.isEmpty()) {
-      try {
-        InputStream fis;
-        BufferedReader br;
-        String line;
-
-        fis = new FileInputStream(params.getKnownEntitiesFile());
-        br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
-        while ((line = br.readLine()) != null) {
-          knownEntities.add(line);
-        }
-
-        // Done with the file
-        br.close();
-        br = null;
-        fis = null;
-      } catch (IOException ex) {
-        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
-      }
-    }
-    return knownEntities;
-  }
-
-  @Override
-  public void addKnownEntity(String unambiguousEntity) {
-    knownEntities.add(unambiguousEntity);
-  }
-
-  @Override
-  public String getKnownEntitiesType() {
-    return params.getEntityType();
-  }
-
-  @Override
-  public void setParameters(BaseModelBuilderParams params) {
-    this.params = params;
-  }
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder.impls;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import opennlp.addons.modelbuilder.KnownEntityProvider;
+
+public class FileKnownEntityProvider implements KnownEntityProvider {
+ 
+  final Set<String> knownEntities = new HashSet<>();
+  BaseModelBuilderParams params;
+
+  @Override
+  public Set<String> getKnownEntities() {
+    if (knownEntities.isEmpty()) {
+      try {
+        InputStream fis;
+        BufferedReader br;
+        String line;
+
+        fis = new FileInputStream(params.getKnownEntitiesFile());
+        br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
+        while ((line = br.readLine()) != null) {
+          knownEntities.add(line);
+        }
+
+        // Done with the file
+        br.close();
+        br = null;
+        fis = null;
+      } catch (IOException ex) {
+        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
+      }
+    }
+    return knownEntities;
+  }
+
+  @Override
+  public void addKnownEntity(String unambiguousEntity) {
+    knownEntities.add(unambiguousEntity);
+  }
+
+  @Override
+  public String getKnownEntitiesType() {
+    return params.getEntityType();
+  }
+
+  @Override
+  public void setParameters(BaseModelBuilderParams params) {
+    this.params = params;
+  }
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
index 8bc4954..e005615 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
@@ -1,94 +1,94 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder.impls;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import opennlp.addons.modelbuilder.ModelGenerationValidator;
-
-/**
- *Validates NER results input before inclusion into the model
- */
-public class FileModelValidatorImpl implements ModelGenerationValidator {
-
-  private final Set<String> badentities = new HashSet<>();
-  BaseModelBuilderParams params;
-
-  @Override
-  public void setParameters(BaseModelBuilderParams params) {
-    this.params = params;
-  }
-
-  @Override
-  public Boolean validSentence(String sentence) {
-    //returning true by default, because the sentence provider will  return only "valid" sentences in this case
-    return true;
-  }
-
-  @Override
-  public Boolean validNamedEntity(String namedEntity) {
-
-    if (badentities.isEmpty()) {
-      getBlackList();
-    }
-//
-//    Pattern p = Pattern.compile("[0-9]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
-//    if (p.matcher(namedEntity).find()) {
-//      return false;
-//    }
-    boolean b = true;
-    if (badentities.contains(namedEntity.toLowerCase())) {
-      b = false;
-    }
-    return b;
-  }
-
-  @Override
-  public Collection<String> getBlackList() {
-    if (params.getKnownEntityBlacklist() == null) {
-      return badentities;
-    }
-    if (!badentities.isEmpty()) {
-      try {
-        InputStream fis;
-        BufferedReader br;
-        String line;
-
-        fis = new FileInputStream(params.getKnownEntityBlacklist());
-        br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
-        while ((line = br.readLine()) != null) {
-          badentities.add(line);
-        }
-        br.close();
-        br = null;
-        fis = null;
-      } catch (IOException ex) {
-        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
-      }
-    }
-    return badentities;
-  }
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder.impls;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import opennlp.addons.modelbuilder.ModelGenerationValidator;
+
+/**
+ *Validates NER results input before inclusion into the model
+ */
+public class FileModelValidatorImpl implements ModelGenerationValidator {
+
+  private final Set<String> badentities = new HashSet<>();
+  BaseModelBuilderParams params;
+
+  @Override
+  public void setParameters(BaseModelBuilderParams params) {
+    this.params = params;
+  }
+
+  @Override
+  public Boolean validSentence(String sentence) {
+    //returning true by default, because the sentence provider will  return only "valid" sentences in this case
+    return true;
+  }
+
+  @Override
+  public Boolean validNamedEntity(String namedEntity) {
+
+    if (badentities.isEmpty()) {
+      getBlackList();
+    }
+//
+//    Pattern p = Pattern.compile("[0-9]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
+//    if (p.matcher(namedEntity).find()) {
+//      return false;
+//    }
+    boolean b = true;
+    if (badentities.contains(namedEntity.toLowerCase())) {
+      b = false;
+    }
+    return b;
+  }
+
+  @Override
+  public Collection<String> getBlackList() {
+    if (params.getKnownEntityBlacklist() == null) {
+      return badentities;
+    }
+    if (!badentities.isEmpty()) {
+      try {
+        InputStream fis;
+        BufferedReader br;
+        String line;
+
+        fis = new FileInputStream(params.getKnownEntityBlacklist());
+        br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
+        while ((line = br.readLine()) != null) {
+          badentities.add(line);
+        }
+        br.close();
+        br = null;
+        fis = null;
+      } catch (IOException ex) {
+        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
+      }
+    }
+    return badentities;
+  }
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
index bf6fe6f..acd288b 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
@@ -1,70 +1,70 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder.impls;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.Charset;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import opennlp.addons.modelbuilder.SentenceProvider;
-
-/**
- * Provides user sentences via a simple text file
- */
-public class FileSentenceProvider implements SentenceProvider {
-
-  private final Set<String> sentences = new HashSet<>();
-  BaseModelBuilderParams params ;
-
-  @Override
-  public Set<String> getSentences() {
-     if (sentences.isEmpty()) {
-      try {
-        InputStream fis;
-        BufferedReader br;
-        String line;
-
-        fis = new FileInputStream(params.getSentenceFile());
-        br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
-        int i=0;
-        while ((line = br.readLine()) != null) {
-         
-          sentences.add(line);
-        }
-
-        // Done with the file
-        br.close();
-        br = null;
-        fis = null;
-      } catch (IOException ex) {
-        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
-      }
-    }
-    return sentences;
-  }
-
-  @Override
-  public void setParameters(BaseModelBuilderParams params) {
-    this.params = params;
-  }
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder.impls;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import opennlp.addons.modelbuilder.SentenceProvider;
+
+/**
+ * Provides user sentences via a simple text file
+ */
+public class FileSentenceProvider implements SentenceProvider {
+
+  private final Set<String> sentences = new HashSet<>();
+  BaseModelBuilderParams params ;
+
+  @Override
+  public Set<String> getSentences() {
+     if (sentences.isEmpty()) {
+      try {
+        InputStream fis;
+        BufferedReader br;
+        String line;
+
+        fis = new FileInputStream(params.getSentenceFile());
+        br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
+        int i=0;
+        while ((line = br.readLine()) != null) {
+         
+          sentences.add(line);
+        }
+
+        // Done with the file
+        br.close();
+        br = null;
+        fis = null;
+      } catch (IOException ex) {
+        Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
+      }
+    }
+    return sentences;
+  }
+
+  @Override
+  public void setParameters(BaseModelBuilderParams params) {
+    this.params = params;
+  }
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
index 8b11dac..aaeaa6f 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
@@ -1,104 +1,104 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder.impls;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import opennlp.addons.modelbuilder.KnownEntityProvider;
-import opennlp.addons.modelbuilder.ModelGenerationValidator;
-import opennlp.addons.modelbuilder.Modelable;
-import opennlp.addons.modelbuilder.SemiSupervisedModelGenerator;
-import opennlp.addons.modelbuilder.SentenceProvider;
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.util.Span;
-
-/**
- *
- * Generic impl that handles all processing using the default file implementations
- */
-public class GenericModelGenerator implements SemiSupervisedModelGenerator {
-
-  private Map<String, String> params = new HashMap<>();
-
-  @Override
-  public void setParameters(BaseModelBuilderParams params) {
-    this.params = params.getAdditionalParams();
-  }
-
-  @Override
-  public void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
-          ModelGenerationValidator validator, Modelable modelable, int iterations) {
-    for (int iteration = 0; iteration < iterations; iteration++) {
-      System.out.println("ITERATION: " + iteration);
-      System.out.println("\tPerfoming Known Entity Annotation");
-      System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
-      System.out.println("\t\treading data....: ");
-      for (String sentence : sentenceProvider.getSentences()) {
-        for (String knownEntity : knownEntityProvider.getKnownEntities()) {
-          if (sentence.contains(knownEntity)) {
-            //if the same sentence has multiple hits should they be annotated separately?
-            modelable.addAnnotatedSentence(modelable.annotate(sentence, knownEntity, knownEntityProvider.getKnownEntitiesType()));
-          }
-        }
-      }
-      if (sentenceProvider.getSentences().isEmpty()) {
-        System.out.println("No sentences in file");
-        return;
-      }
-      if (knownEntityProvider.getKnownEntities().isEmpty()) {
-        System.out.println("No known entities in file");
-        return;
-      }
-      System.out.println("\t\twriting annotated sentences....: ");
-      modelable.writeAnnotatedSentences();
-          System.out.println("\t\tbuilding model.... ");
-      modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
-      System.out.println("\t\tmodel building complete.... ");
-      NameFinderME nf = new NameFinderME(modelable.getModel());
-      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
-      System.out.println("\tPerforming NER with new model");
-      System.out.println("\t\tPrinting NER Results. Add undesired results to the blacklist file and start over");
-      for (String sentence : sentenceProvider.getSentences()) {
-        if (!validator.validSentence(sentence)) {
-          continue;
-        }
-        String[] tokens = modelable.tokenizeSentenceToWords(sentence);
-
-        Span[] find = nf.find(tokens);
-        nf.clearAdaptiveData();
-
-        String[] namedEntities = Span.spansToStrings(find, tokens);
-
-        for (String namedEntity : namedEntities) {
-          System.out.println("\t\t" + namedEntity);
-          if (validator.validNamedEntity(namedEntity)) {
-
-            knownEntityProvider.addKnownEntity(namedEntity);
-            modelable.addAnnotatedSentence(modelable.annotate(sentence, namedEntity, knownEntityProvider.getKnownEntitiesType()));
-
-          } else {
-            System.out.println("\t\t" + namedEntity + "...already blacklisted");
-          }
-        }
-      }
-      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
-      System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
-    }
-    modelable.writeAnnotatedSentences();
-    modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
-  }
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder.impls;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import opennlp.addons.modelbuilder.KnownEntityProvider;
+import opennlp.addons.modelbuilder.ModelGenerationValidator;
+import opennlp.addons.modelbuilder.Modelable;
+import opennlp.addons.modelbuilder.SemiSupervisedModelGenerator;
+import opennlp.addons.modelbuilder.SentenceProvider;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.util.Span;
+
+/**
+ *
+ * Generic impl that handles all processing using the default file implementations
+ */
+public class GenericModelGenerator implements SemiSupervisedModelGenerator {
+
+  private Map<String, String> params = new HashMap<>();
+
+  @Override
+  public void setParameters(BaseModelBuilderParams params) {
+    this.params = params.getAdditionalParams();
+  }
+
+  @Override
+  public void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
+          ModelGenerationValidator validator, Modelable modelable, int iterations) {
+    for (int iteration = 0; iteration < iterations; iteration++) {
+      System.out.println("ITERATION: " + iteration);
+      System.out.println("\tPerfoming Known Entity Annotation");
+      System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
+      System.out.println("\t\treading data....: ");
+      for (String sentence : sentenceProvider.getSentences()) {
+        for (String knownEntity : knownEntityProvider.getKnownEntities()) {
+          if (sentence.contains(knownEntity)) {
+            //if the same sentence has multiple hits should they be annotated separately?
+            modelable.addAnnotatedSentence(modelable.annotate(sentence, knownEntity, knownEntityProvider.getKnownEntitiesType()));
+          }
+        }
+      }
+      if (sentenceProvider.getSentences().isEmpty()) {
+        System.out.println("No sentences in file");
+        return;
+      }
+      if (knownEntityProvider.getKnownEntities().isEmpty()) {
+        System.out.println("No known entities in file");
+        return;
+      }
+      System.out.println("\t\twriting annotated sentences....: ");
+      modelable.writeAnnotatedSentences();
+          System.out.println("\t\tbuilding model.... ");
+      modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
+      System.out.println("\t\tmodel building complete.... ");
+      NameFinderME nf = new NameFinderME(modelable.getModel());
+      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
+      System.out.println("\tPerforming NER with new model");
+      System.out.println("\t\tPrinting NER Results. Add undesired results to the blacklist file and start over");
+      for (String sentence : sentenceProvider.getSentences()) {
+        if (!validator.validSentence(sentence)) {
+          continue;
+        }
+        String[] tokens = modelable.tokenizeSentenceToWords(sentence);
+
+        Span[] find = nf.find(tokens);
+        nf.clearAdaptiveData();
+
+        String[] namedEntities = Span.spansToStrings(find, tokens);
+
+        for (String namedEntity : namedEntities) {
+          System.out.println("\t\t" + namedEntity);
+          if (validator.validNamedEntity(namedEntity)) {
+
+            knownEntityProvider.addKnownEntity(namedEntity);
+            modelable.addAnnotatedSentence(modelable.annotate(sentence, namedEntity, knownEntityProvider.getKnownEntitiesType()));
+
+          } else {
+            System.out.println("\t\t" + namedEntity + "...already blacklisted");
+          }
+        }
+      }
+      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
+      System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
+    }
+    modelable.writeAnnotatedSentences();
+    modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
+  }
+}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
index caa6ea8..2df6a9e 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
@@ -1,128 +1,128 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.addons.modelbuilder.impls;
-
-import java.io.BufferedOutputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import opennlp.addons.modelbuilder.Modelable;
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.NameSample;
-import opennlp.tools.namefind.NameSampleDataStream;
-import opennlp.tools.namefind.TokenNameFinderFactory;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.util.InputStreamFactory;
-import opennlp.tools.util.MarkableFileInputStreamFactory;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.PlainTextByLineStream;
-import opennlp.tools.util.TrainingParameters;
-
-/**
- * Creates annotations, writes annotations to file, and creates a model and writes to a file.
- */
-public class GenericModelableImpl implements Modelable {
-
-  private Set<String> annotatedSentences = new HashSet<>();
-  BaseModelBuilderParams params;
-
-  @Override
-  public void setParameters(BaseModelBuilderParams params) {
-    this.params = params;
-  }
-
-  @Override
-  public String annotate(String sentence, String namedEntity, String entityType) {
-    return sentence.replace(namedEntity, " <START:" + entityType + "> " + namedEntity + " <END> ");
-  }
-
-  @Override
-  public void writeAnnotatedSentences() {
-    try (FileWriter writer = new FileWriter(params.getAnnotatedTrainingDataFile(), false)) {
-      for (String s : annotatedSentences) {
-        writer.write(s.replace("\n", " ").trim() + "\n");
-      }
-    } catch (IOException ex) {
-      ex.printStackTrace();
-    }
-  }
-
-  @Override
-  public Set<String> getAnnotatedSentences() {
-    return annotatedSentences;
-  }
-
-  @Override
-  public void setAnnotatedSentences(Set<String> annotatedSentences) {
-    this.annotatedSentences = annotatedSentences;
-  }
-
-  @Override
-  public void addAnnotatedSentence(String annotatedSentence) {
-    annotatedSentences.add(annotatedSentence);
-  }
-
-  @Override
-  public void buildModel(String entityType) {
-    final InputStreamFactory factory;
-    try {
-      factory = new MarkableFileInputStreamFactory(params.getAnnotatedTrainingDataFile());
-    } catch (FileNotFoundException e) {
-      throw new RuntimeException("Error finding and reading the training data file!", e);
-    }
-
-    final TrainingParameters trainParams = TrainingParameters.defaultParams();
-
-    TokenNameFinderModel model;
-    try (ObjectStream<NameSample> samples =
-                 new NameSampleDataStream(new PlainTextByLineStream(factory, StandardCharsets.UTF_8));
-         OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(params.getModelFile()))) {
-
-      System.out.println("\tBuilding Model using " + annotatedSentences.size() + " annotations");
-      System.out.println("\t\treading training data...");
-      model = NameFinderME.train("en", entityType, samples, trainParams, new TokenNameFinderFactory());
-      model.serialize(modelOut);
-
-      System.out.println("\tmodel generated");
-    } catch (Exception e) {
-      throw new RuntimeException("Error building model! " + e.getLocalizedMessage(), e);
-    }
-  }
-
-  @Override
-  public TokenNameFinderModel getModel() {
-    TokenNameFinderModel nerModel = null;
-    try {
-      nerModel = new TokenNameFinderModel(params.getModelFile());
-    } catch (IOException ex) {
-      Logger.getLogger(GenericModelableImpl.class.getName()).log(Level.SEVERE, null, ex);
-    }
-    return nerModel;
-  }
-
-  @Override
-  public String[] tokenizeSentenceToWords(String sentence) {
-    return sentence.split(" ");
-  }
-}
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.addons.modelbuilder.impls;
+
+import java.io.BufferedOutputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import opennlp.addons.modelbuilder.Modelable;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.namefind.NameSampleDataStream;
+import opennlp.tools.namefind.TokenNameFinderFactory;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.MarkableFileInputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
+
+/**
+ * Creates annotations, writes annotations to file, and creates a model and writes to a file.
+ */
+public class GenericModelableImpl implements Modelable {
+
+  private Set<String> annotatedSentences = new HashSet<>();
+  BaseModelBuilderParams params;
+
+  @Override
+  public void setParameters(BaseModelBuilderParams params) {
+    this.params = params;
+  }
+
+  @Override
+  public String annotate(String sentence, String namedEntity, String entityType) {
+    return sentence.replace(namedEntity, " <START:" + entityType + "> " + namedEntity + " <END> ");
+  }
+
+  @Override
+  public void writeAnnotatedSentences() {
+    try (FileWriter writer = new FileWriter(params.getAnnotatedTrainingDataFile(), false)) {
+      for (String s : annotatedSentences) {
+        writer.write(s.replace("\n", " ").trim() + "\n");
+      }
+    } catch (IOException ex) {
+      ex.printStackTrace();
+    }
+  }
+
+  @Override
+  public Set<String> getAnnotatedSentences() {
+    return annotatedSentences;
+  }
+
+  @Override
+  public void setAnnotatedSentences(Set<String> annotatedSentences) {
+    this.annotatedSentences = annotatedSentences;
+  }
+
+  @Override
+  public void addAnnotatedSentence(String annotatedSentence) {
+    annotatedSentences.add(annotatedSentence);
+  }
+
+  @Override
+  public void buildModel(String entityType) {
+    final InputStreamFactory factory;
+    try {
+      factory = new MarkableFileInputStreamFactory(params.getAnnotatedTrainingDataFile());
+    } catch (FileNotFoundException e) {
+      throw new RuntimeException("Error finding and reading the training data file!", e);
+    }
+
+    final TrainingParameters trainParams = TrainingParameters.defaultParams();
+
+    TokenNameFinderModel model;
+    try (ObjectStream<NameSample> samples =
+                 new NameSampleDataStream(new PlainTextByLineStream(factory, StandardCharsets.UTF_8));
+         OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(params.getModelFile()))) {
+
+      System.out.println("\tBuilding Model using " + annotatedSentences.size() + " annotations");
+      System.out.println("\t\treading training data...");
+      model = NameFinderME.train("en", entityType, samples, trainParams, new TokenNameFinderFactory());
+      model.serialize(modelOut);
+
+      System.out.println("\tmodel generated");
+    } catch (Exception e) {
+      throw new RuntimeException("Error building model! " + e.getLocalizedMessage(), e);
+    }
+  }
+
+  @Override
+  public TokenNameFinderModel getModel() {
+    TokenNameFinderModel nerModel = null;
+    try {
+      nerModel = new TokenNameFinderModel(params.getModelFile());
+    } catch (IOException ex) {
+      Logger.getLogger(GenericModelableImpl.class.getName()).log(Level.SEVERE, null, ex);
+    }
+    return nerModel;
+  }
+
+  @Override
+  public String[] tokenizeSentenceToWords(String sentence) {
+    return sentence.split(" ");
+  }
+}
diff --git a/opennlp-similarity/README.txt b/opennlp-similarity/README.txt
index b535487..a0ae062 100644
--- a/opennlp-similarity/README.txt
+++ b/opennlp-similarity/README.txt
@@ -1,138 +1,138 @@
-Apache OpenNLP ${pom.version}
-===============================
-
-
-Building from the Source Distribution
--------------------------------------
-
-At least Maven 3.0.0 is required for building.
-
-To build everything go into the opennlp directory and run the following command:
-    mvn clean install
-   
-The results of the build will be placed  in:
-    opennlp-distr/target/apache-opennlp-[version]-bin.tar-gz (or .zip)
-
-What is in Similarity component in Apache OpenNLP ${pom.version}
----------------------------------------
-SIMILARITY COMPONENT of OpenNLP
-
-1. Introduction
-This component does text relevance assessment. It takes two portions of texts (phrases, sentences, paragraphs) and returns a similarity score.
-Similarity component can be used on top of search to improve relevance, computing similarity score between a question and all search results (snippets). 
-Also, this component is useful for web mining of images, videos, forums, blogs, and other media with textual descriptions. Such applications as content generation 
-and filtering meaningless speech recognition results are included in the sample applications of this component.
-   Relevance assessment is based on machine learning of syntactic parse trees (constituency trees, http://en.wikipedia.org/wiki/Parse_tree). 
-The similarity score is calculated as the size of all maximal common sub-trees for sentences from a pair of texts (
-www.aaai.org/ocs/index.php/WS/AAAIW11/paper/download/3971/4187, www.aaai.org/ocs/index.php/FLAIRS/FLAIRS11/paper/download/2573/3018,
-www.aaai.org/ocs/index.php/SSS/SSS10/paper/download/1146/1448).
-   The objective of Similarity component is to give an application engineer as tool for text relevance which can be used as a black box, no need to understand 
- computational linguistics or machine learning. 
- 
- 2. Installation
- Please refer to OpenNLP installation instructions
- 
- 3. First use case of Similarity component: search
- 
- To start with this component, please refer to SearchResultsProcessorTest.java in package opennlp.tools.similarity.apps
-   public void testSearchOrder() runs web search using Bing API and improves search relevance.
-   Look at the code of 
-      public List<HitBase> runSearch(String query) 
-   and then at 
-      private	BingResponse calculateMatchScoreResortHits(BingResponse resp, String searchQuery)
-   which gets search results from Bing and re-ranks them based on computed similarity score.
- 
-   The main entry to Similarity component is 
-    SentencePairMatchResult matchRes = sm.assessRelevance(snapshot, searchQuery);
-    where we pass the search query and the snapshot and obtain the similarity assessment structure which includes the similarity score.
-   
-   To run this test you need to obtain search API key from Bing at www.bing.com/developers/s/APIBasics.html and specify it in public class BingQueryRunner in
-  protected static final String APP_ID. 
-  
-  4. Solving a unique problem: content generation
-  To demonstrate the usability of Similarity component to tackle a problem which is hard to solve without a linguistic-based technology, 
-  we introduce a content generation component:
-   RelatedSentenceFinder.java
-   
-   The entry point here is the function call
-   hits = f.generateContentAbout("Albert Einstein");
-   which writes a biography of Albert Einstein by finding sentences on the web about various kinds of his activities (such as 'born', 'graduate', 'invented' etc.).
-   The key here is to compute similarity between the seed expression like "Albert Einstein invented relativity theory" and search result like 
-   "Albert Einstein College of Medicine | Medical Education | Biomedical ...
-    www.einstein.yu.edu/Albert Einstein College of Medicine is one of the nation's premier institutions for medical education, ..."
-    and filter out irrelevant search results.
-   
-   This is done in function 
-   public HitBase augmentWithMinedSentencesAndVerifyRelevance(HitBase item, String originalSentence,
-			List<String> sentsAll)
-			
-   	  SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence + " " + title, originalSentence);
-   You can consult the results in gen.txt, where an essay on Einstein bio is written.
-   
-   These are examples of generated articles, given the article title
-     http://www.allvoices.com/contributed-news/9423860/content/81937916-ichie-sings-jazz-blues-contemporary-tunes
-     http://www.allvoices.com/contributed-news/9415063-britney-spears-femme-fatale-in-north-sf-bay-area
-     
-  5. Solving a high-importance problem: filtering out meaningless speech recognition results.
-  Speech recognitions SDKs usually produce a number of phrases as results, such as 
-  			 "remember to buy milk tomorrow from trader joes",
-			 "remember to buy milk tomorrow from 3 to jones"
-  One can see that the former is meaningful, and the latter is meaningless (although similar in terms of how it is pronounced).
-  We use web mining and Similarity component to detect a meaningful option (a mistake caused by trying to interpret meaningless 
-  request by a query understanding system such as Siri for iPhone can be costly).
- 
-  SpeechRecognitionResultsProcessor.java does the job:
-  public List<SentenceMeaningfullnessScore> runSearchAndScoreMeaningfulness(List<String> sents)
-  re-ranks the phrases in the order of decrease of meaningfulness.
-  
-  6. Similarity component internals
-  in the package   opennlp.tools.textsimilarity.chunker2matcher
-  ParserChunker2MatcherProcessor.java does parsing of two portions of text and matching the resultant parse trees to assess similarity between 
-  these portions of text.
-  To run ParserChunker2MatcherProcessor
-     private static String MODEL_DIR = "resources/models";
-  needs to be specified
-  
-  The key function
-  public SentencePairMatchResult assessRelevance(String para1, String para2)
-  takes two portions of text and does similarity assessment by finding the set of all maximum common subtrees 
-  of the set of parse trees for each portion of text
-  
-  It splits paragraphs into sentences, parses them, obtained chunking information and produces grouped phrases (noun, evrn, prepositional etc.):
-  public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(String para)
-  
-  and then attempts to find common subtrees:
-  in ParseTreeMatcherDeterministic.java
-		List<List<ParseTreeChunk>> res = md.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst)
-  
-  Phrase matching functionality is in package opennlp.tools.textsimilarity;
-  ParseTreeMatcherDeterministic.java:
-  Here's the key matching function which takes two phrases, aligns them and finds a set of maximum common sub-phrase
-  public List<ParseTreeChunk> generalizeTwoGroupedPhrasesDeterministic
-  
-  7. Package structure
-  	opennlp.tools.similarity.apps : 3 main applications
-	opennlp.tools.similarity.apps.utils: utilities for above applications
-	
-	opennlp.tools.textsimilarity.chunker2matcher: parser which converts text into a form for matching parse trees
-	opennlp.tools.textsimilarity: parse tree matching functionality
-	
-
-
-
-Requirements
-------------
-Java 1.5 is required to run OpenNLP
-Maven 3.0.0 is required for building it
-
-Known OSGi Issues
-------------
-In an OSGi environment the following things are not supported:
-- The coreference resolution component
-- The ability to load a user provided feature generator class
-
-Note
-----
-The current API contains still many deprecated methods, these
-will be removed in one of our next releases, please
-migrate to our new API.
+Apache OpenNLP ${pom.version}
+===============================
+
+
+Building from the Source Distribution
+-------------------------------------
+
+At least Maven 3.0.0 is required for building.
+
+To build everything go into the opennlp directory and run the following command:
+    mvn clean install
+   
+The results of the build will be placed  in:
+    opennlp-distr/target/apache-opennlp-[version]-bin.tar-gz (or .zip)
+
+What is in Similarity component in Apache OpenNLP ${pom.version}
+---------------------------------------
+SIMILARITY COMPONENT of OpenNLP
+
+1. Introduction
+This component does text relevance assessment. It takes two portions of texts (phrases, sentences, paragraphs) and returns a similarity score.
+Similarity component can be used on top of search to improve relevance, computing similarity score between a question and all search results (snippets). 
+Also, this component is useful for web mining of images, videos, forums, blogs, and other media with textual descriptions. Such applications as content generation 
+and filtering meaningless speech recognition results are included in the sample applications of this component.
+   Relevance assessment is based on machine learning of syntactic parse trees (constituency trees, http://en.wikipedia.org/wiki/Parse_tree). 
+The similarity score is calculated as the size of all maximal common sub-trees for sentences from a pair of texts (
+www.aaai.org/ocs/index.php/WS/AAAIW11/paper/download/3971/4187, www.aaai.org/ocs/index.php/FLAIRS/FLAIRS11/paper/download/2573/3018,
+www.aaai.org/ocs/index.php/SSS/SSS10/paper/download/1146/1448).
+   The objective of Similarity component is to give an application engineer as tool for text relevance which can be used as a black box, no need to understand 
+ computational linguistics or machine learning. 
+ 
+ 2. Installation
+ Please refer to OpenNLP installation instructions
+ 
+ 3. First use case of Similarity component: search
+ 
+ To start with this component, please refer to SearchResultsProcessorTest.java in package opennlp.tools.similarity.apps
+   public void testSearchOrder() runs web search using Bing API and improves search relevance.
+   Look at the code of 
+      public List<HitBase> runSearch(String query) 
+   and then at 
+      private	BingResponse calculateMatchScoreResortHits(BingResponse resp, String searchQuery)
+   which gets search results from Bing and re-ranks them based on computed similarity score.
+ 
+   The main entry to Similarity component is 
+    SentencePairMatchResult matchRes = sm.assessRelevance(snapshot, searchQuery);
+    where we pass the search query and the snapshot and obtain the similarity assessment structure which includes the similarity score.
+   
+   To run this test you need to obtain search API key from Bing at www.bing.com/developers/s/APIBasics.html and specify it in public class BingQueryRunner in
+  protected static final String APP_ID. 
+  
+  4. Solving a unique problem: content generation
+  To demonstrate the usability of Similarity component to tackle a problem which is hard to solve without a linguistic-based technology, 
+  we introduce a content generation component:
+   RelatedSentenceFinder.java
+   
+   The entry point here is the function call
+   hits = f.generateContentAbout("Albert Einstein");
+   which writes a biography of Albert Einstein by finding sentences on the web about various kinds of his activities (such as 'born', 'graduate', 'invented' etc.).
+   The key here is to compute similarity between the seed expression like "Albert Einstein invented relativity theory" and search result like 
+   "Albert Einstein College of Medicine | Medical Education | Biomedical ...
+    www.einstein.yu.edu/Albert Einstein College of Medicine is one of the nation's premier institutions for medical education, ..."
+    and filter out irrelevant search results.
+   
+   This is done in function 
+   public HitBase augmentWithMinedSentencesAndVerifyRelevance(HitBase item, String originalSentence,
+			List<String> sentsAll)
+			
+   	  SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence + " " + title, originalSentence);
+   You can consult the results in gen.txt, where an essay on Einstein bio is written.
+   
+   These are examples of generated articles, given the article title
+     http://www.allvoices.com/contributed-news/9423860/content/81937916-ichie-sings-jazz-blues-contemporary-tunes
+     http://www.allvoices.com/contributed-news/9415063-britney-spears-femme-fatale-in-north-sf-bay-area
+     
+  5. Solving a high-importance problem: filtering out meaningless speech recognition results.
+  Speech recognitions SDKs usually produce a number of phrases as results, such as 
+  			 "remember to buy milk tomorrow from trader joes",
+			 "remember to buy milk tomorrow from 3 to jones"
+  One can see that the former is meaningful, and the latter is meaningless (although similar in terms of how it is pronounced).
+  We use web mining and Similarity component to detect a meaningful option (a mistake caused by trying to interpret meaningless 
+  request by a query understanding system such as Siri for iPhone can be costly).
+ 
+  SpeechRecognitionResultsProcessor.java does the job:
+  public List<SentenceMeaningfullnessScore> runSearchAndScoreMeaningfulness(List<String> sents)
+  re-ranks the phrases in the order of decrease of meaningfulness.
+  
+  6. Similarity component internals
+  in the package   opennlp.tools.textsimilarity.chunker2matcher
+  ParserChunker2MatcherProcessor.java does parsing of two portions of text and matching the resultant parse trees to assess similarity between 
+  these portions of text.
+  To run ParserChunker2MatcherProcessor
+     private static String MODEL_DIR = "resources/models";
+  needs to be specified
+  
+  The key function
+  public SentencePairMatchResult assessRelevance(String para1, String para2)
+  takes two portions of text and does similarity assessment by finding the set of all maximum common subtrees 
+  of the set of parse trees for each portion of text
+  
+  It splits paragraphs into sentences, parses them, obtained chunking information and produces grouped phrases (noun, evrn, prepositional etc.):
+  public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(String para)
+  
+  and then attempts to find common subtrees:
+  in ParseTreeMatcherDeterministic.java
+		List<List<ParseTreeChunk>> res = md.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst)
+  
+  Phrase matching functionality is in package opennlp.tools.textsimilarity;
+  ParseTreeMatcherDeterministic.java:
+  Here's the key matching function which takes two phrases, aligns them and finds a set of maximum common sub-phrase
+  public List<ParseTreeChunk> generalizeTwoGroupedPhrasesDeterministic
+  
+  7. Package structure
+  	opennlp.tools.similarity.apps : 3 main applications
+	opennlp.tools.similarity.apps.utils: utilities for above applications
+	
+	opennlp.tools.textsimilarity.chunker2matcher: parser which converts text into a form for matching parse trees
+	opennlp.tools.textsimilarity: parse tree matching functionality
+	
+
+
+
+Requirements
+------------
+Java 1.5 is required to run OpenNLP
+Maven 3.0.0 is required for building it
+
+Known OSGi Issues
+------------
+In an OSGi environment the following things are not supported:
+- The coreference resolution component
+- The ability to load a user provided feature generator class
+
+Note
+----
+The current API contains still many deprecated methods, these
+will be removed in one of our next releases, please
+migrate to our new API.
diff --git a/opennlp-similarity/RELEASE_NOTES.html b/opennlp-similarity/RELEASE_NOTES.html
index 7706367..447b27f 100644
--- a/opennlp-similarity/RELEASE_NOTES.html
+++ b/opennlp-similarity/RELEASE_NOTES.html
@@ -1,77 +1,77 @@
-<!--
-    ***************************************************************
-    * Licensed to the Apache Software Foundation (ASF) under one
-    * or more contributor license agreements.  See the NOTICE file
-    * distributed with this work for additional information
-    * regarding copyright ownership.  The ASF licenses this file
-    * to you under the Apache License, Version 2.0 (the
-    * "License"); you may not use this file except in compliance
-    * with the License.  You may obtain a copy of the License at
-    *
-    *   http://www.apache.org/licenses/LICENSE-2.0
-    * 
-    * Unless required by applicable law or agreed to in writing,
-    * software distributed under the License is distributed on an
-    * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    * KIND, either express or implied.  See the License for the
-    * specific language governing permissions and limitations
-    * under the License.
-    ***************************************************************
---> 
-
-<html> 
-<head> 
-  <title>Apache OpenNLP ${pom.version} Release Notes</title> 
-</head> 
-<body> 
-<h1>Apache OpenNLP ${pom.version} Release Notes</h1> 
- 
-<h2>Contents</h2> 
-<p> 
-<a href="#what.is.opennlp">What is Similarity component of Apache OpenNLP?</a><br/> 
-<a href="#major.changes">This Release</a><br/> 
-<a href="#get.involved">How to Get Involved</a><br/> 
-<a href="#report.issues">How to Report Issues</a><br/> 
-<a href="#list.issues">List of JIRA Issues Fixed in this Release</a><br/> 
-</p>  
-   
-<h2><a name="what.is.opennlp">1. What is Apache OpenNLP?</a></h2> 
-<p>
-This component does text relevance assessment. It takes two portions of texts (phrases, sentences, paragraphs) and returns a similarity score.
-Similarity component can be used on top of search to improve relevance, computing similarity score between a question and all search results (snippets). 
-Also, this component is useful for web mining of images, videos, forums, blogs, and other media with textual descriptions. Such applications as content generation 
-and filtering meaningless speech recognition results are included in the sample applications of this component.
-   Relevance assessment is based on machine learning of syntactic parse trees (constituency trees, http://en.wikipedia.org/wiki/Parse_tree). 
-The similarity score is calculated as the size of all maximal common sub-trees for sentences from a pair of texts (
-www.aaai.org/ocs/index.php/WS/AAAIW11/paper/download/3971/4187, www.aaai.org/ocs/index.php/FLAIRS/FLAIRS11/paper/download/2573/3018,
-www.aaai.org/ocs/index.php/SSS/SSS10/paper/download/1146/1448).
-   The objective of Similarity component is to give an application engineer as tool for text relevance which can be used as a black box, no need to understand 
- computational linguistics or machine learning. 
-</p>
-
-<h2><a name="major.changes">This Release</a></h2> 
-<p> 
-Please see the <a href="README">README</a> for this information.
-</p> 
-  
-<h2><a name="get.involved">How to Get Involved</a></h2> 
-<p> 
-The Apache OpenNLP project really needs and appreciates any contributions, 
-including documentation help, source code and feedback.  If you are interested
-in contributing, please visit <a href="http://opennlp.apache.org/">http://opennlp.apache.org/</a>
-</p>
-  
-<h2><a name="report.issues">How to Report Issues</a></h2> 
-<p> 
-The Apache OpenNLP project uses JIRA for issue tracking.  Please report any 
-issues you find at 
-<a href="http://issues.apache.org/jira/browse/opennlp">http://issues.apache.org/jira/browse/opennlp</a> 
-</p> 
-  
-<h2><a name="list.issues">List of JIRA Issues Fixed in this Release</a></h2>
-<p>
-Click <a href="issuesFixed/jira-report.html">issuesFixed/jira-report.hmtl</a> for the list of 
-issues fixed in this release.
-</p>
-</body> 
+<!--
+    ***************************************************************
+    * Licensed to the Apache Software Foundation (ASF) under one
+    * or more contributor license agreements.  See the NOTICE file
+    * distributed with this work for additional information
+    * regarding copyright ownership.  The ASF licenses this file
+    * to you under the Apache License, Version 2.0 (the
+    * "License"); you may not use this file except in compliance
+    * with the License.  You may obtain a copy of the License at
+    *
+    *   http://www.apache.org/licenses/LICENSE-2.0
+    * 
+    * Unless required by applicable law or agreed to in writing,
+    * software distributed under the License is distributed on an
+    * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    * KIND, either express or implied.  See the License for the
+    * specific language governing permissions and limitations
+    * under the License.
+    ***************************************************************
+--> 
+
+<html> 
+<head> 
+  <title>Apache OpenNLP ${pom.version} Release Notes</title> 
+</head> 
+<body> 
+<h1>Apache OpenNLP ${pom.version} Release Notes</h1> 
+ 
+<h2>Contents</h2> 
+<p> 
+<a href="#what.is.opennlp">What is Similarity component of Apache OpenNLP?</a><br/> 
+<a href="#major.changes">This Release</a><br/> 
+<a href="#get.involved">How to Get Involved</a><br/> 
+<a href="#report.issues">How to Report Issues</a><br/> 
+<a href="#list.issues">List of JIRA Issues Fixed in this Release</a><br/> 
+</p>  
+   
+<h2><a name="what.is.opennlp">1. What is Apache OpenNLP?</a></h2> 
+<p>
+This component does text relevance assessment. It takes two portions of texts (phrases, sentences, paragraphs) and returns a similarity score.
+Similarity component can be used on top of search to improve relevance, computing similarity score between a question and all search results (snippets). 
+Also, this component is useful for web mining of images, videos, forums, blogs, and other media with textual descriptions. Such applications as content generation 
+and filtering meaningless speech recognition results are included in the sample applications of this component.
+   Relevance assessment is based on machine learning of syntactic parse trees (constituency trees, http://en.wikipedia.org/wiki/Parse_tree). 
+The similarity score is calculated as the size of all maximal common sub-trees for sentences from a pair of texts (
+www.aaai.org/ocs/index.php/WS/AAAIW11/paper/download/3971/4187, www.aaai.org/ocs/index.php/FLAIRS/FLAIRS11/paper/download/2573/3018,
+www.aaai.org/ocs/index.php/SSS/SSS10/paper/download/1146/1448).
+   The objective of Similarity component is to give an application engineer as tool for text relevance which can be used as a black box, no need to understand 
+ computational linguistics or machine learning. 
+</p>
+
+<h2><a name="major.changes">This Release</a></h2> 
+<p> 
+Please see the <a href="README">README</a> for this information.
+</p> 
+  
+<h2><a name="get.involved">How to Get Involved</a></h2> 
+<p> 
+The Apache OpenNLP project really needs and appreciates any contributions, 
+including documentation help, source code and feedback.  If you are interested
+in contributing, please visit <a href="http://opennlp.apache.org/">http://opennlp.apache.org/</a>
+</p>
+  
+<h2><a name="report.issues">How to Report Issues</a></h2> 
+<p> 
+The Apache OpenNLP project uses JIRA for issue tracking.  Please report any 
+issues you find at 
+<a href="http://issues.apache.org/jira/browse/opennlp">http://issues.apache.org/jira/browse/opennlp</a> 
+</p> 
+  
+<h2><a name="list.issues">List of JIRA Issues Fixed in this Release</a></h2>
+<p>
+Click <a href="issuesFixed/jira-report.html">issuesFixed/jira-report.hmtl</a> for the list of 
+issues fixed in this release.
+</p>
+</body> 
 </html>
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
index b712847..b328759 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
@@ -1,54 +1,54 @@
-package opennlp.tools.apps.contentgen.multithreaded;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.similarity.apps.BingQueryRunner;
-import opennlp.tools.similarity.apps.HitBase;
-
-public class BingWebQueryRunnerThread extends BingQueryRunner implements Runnable{
-	
-	private String query;
-	private List<HitBase> results= new ArrayList<HitBase>();
-	public BingWebQueryRunnerThread(String Query){
-		super();
-		this.query=Query;
-	}
-	public void run(){
-		results=runSearch(query);
-		fireMyEvent(new MyEvent(this));
-	}
-	public List<HitBase> getResults() {
-		return results;
-	}
-	
-	public String getQuery() {
-		return query;
-	}
-	
-	// Create the listener list
-    protected javax.swing.event.EventListenerList listenerList = new javax.swing.event.EventListenerList();
-    // This methods allows classes to register for MyEvents 
-
-    public void addMyEventListener(MyEventListener listener) {
-        listenerList.add(MyEventListener.class, listener);
-    }
-    // This methods allows classes to unregister for MyEvents
-
-    public void removeMyEventListener(MyEventListener listener) {
-        listenerList.remove(MyEventListener.class, listener);
-    }
-
-    void fireMyEvent(MyEvent evt) {
-        Object[] listeners = listenerList.getListenerList();
-        // Each listener occupies two elements - the first is the listener class
-        // and the second is the listener instance
-        for (int i = 0; i < listeners.length; i += 2) {
-            if (listeners[i] == MyEventListener.class) {
-                ((MyEventListener) listeners[i + 1]).MyEvent(evt);
-            }
-        }
-    }
-	
-
-}
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.BingQueryRunner;
+import opennlp.tools.similarity.apps.HitBase;
+
+public class BingWebQueryRunnerThread extends BingQueryRunner implements Runnable{
+	
+	private String query;
+	private List<HitBase> results= new ArrayList<HitBase>();
+	public BingWebQueryRunnerThread(String Query){
+		super();
+		this.query=Query;
+	}
+	public void run(){
+		results=runSearch(query);
+		fireMyEvent(new MyEvent(this));
+	}
+	public List<HitBase> getResults() {
+		return results;
+	}
+	
+	public String getQuery() {
+		return query;
+	}
+	
+	// Create the listener list
+    protected javax.swing.event.EventListenerList listenerList = new javax.swing.event.EventListenerList();
+    // This methods allows classes to register for MyEvents 
+
+    public void addMyEventListener(MyEventListener listener) {
+        listenerList.add(MyEventListener.class, listener);
+    }
+    // This methods allows classes to unregister for MyEvents
+
+    public void removeMyEventListener(MyEventListener listener) {
+        listenerList.remove(MyEventListener.class, listener);
+    }
+
+    void fireMyEvent(MyEvent evt) {
+        Object[] listeners = listenerList.getListenerList();
+        // Each listener occupies two elements - the first is the listener class
+        // and the second is the listener instance
+        for (int i = 0; i < listeners.length; i += 2) {
+            if (listeners[i] == MyEventListener.class) {
+                ((MyEventListener) listeners[i + 1]).MyEvent(evt);
+            }
+        }
+    }
+	
+
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java
index 328d95c..29d053b 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java
@@ -1,88 +1,88 @@
-package opennlp.tools.apps.contentgen.multithreaded;
-
-import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
-
-
-public class Fragment {
-	
-		public String resultText;      // result
-		public double score;
-		public String fragment; // original
-		public String sourceURL;
-
-		Fragment(String text, double score) {
-			this.resultText = text;
-			this.score = score;
-		}
-		
-			
-		public String getResultText() {
-			return resultText;
-		}
-
-		public void setResultText(String resultText) {
-			this.resultText = resultText;
-		}
-
-
-
-		public double getScore() {
-			return score;
-		}
-
-
-
-		public void setScore(double score) {
-			this.score = score;
-		}
-
-
-
-		public String getFragment() {
-			return fragment;
-		}
-
-
-
-		public void setFragment(String fragment) {
-			this.fragment = fragment;
-		}
-
-		
-
-		public String getSourceURL() {
-			return sourceURL;
-		}
-
-
-		public void setSourceURL(String sourceURL) {
-			this.sourceURL = sourceURL;
-		}
-
-
-		public String toString(){
-			return this.resultText;
-		}
-
-		@Override
-		public boolean equals(Object o) {
-			if (this == o) return true;
-			if (o == null || getClass() != o.getClass()) return false;
-
-			Fragment fragment = (Fragment) o;
-
-			if (resultText == null && fragment.resultText == null) {
-				return true;
-			} else if ((resultText == null && fragment.resultText != null) || (resultText != null && fragment.resultText == null)) {
-				return false;
-			}
-
-			StringDistanceMeasurer sdm = new StringDistanceMeasurer();
-			return sdm.measureStringDistance(resultText, fragment.resultText) > 0.8;
-		}
-
-		@Override
-		public int hashCode() {
-			return resultText != null ? resultText.hashCode() : 0;
-		}
-}
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
+
+
+public class Fragment {
+	
+		public String resultText;      // result
+		public double score;
+		public String fragment; // original
+		public String sourceURL;
+
+		Fragment(String text, double score) {
+			this.resultText = text;
+			this.score = score;
+		}
+		
+			
+		public String getResultText() {
+			return resultText;
+		}
+
+		public void setResultText(String resultText) {
+			this.resultText = resultText;
+		}
+
+
+
+		public double getScore() {
+			return score;
+		}
+
+
+
+		public void setScore(double score) {
+			this.score = score;
+		}
+
+
+
+		public String getFragment() {
+			return fragment;
+		}
+
+
+
+		public void setFragment(String fragment) {
+			this.fragment = fragment;
+		}
+
+		
+
+		public String getSourceURL() {
+			return sourceURL;
+		}
+
+
+		public void setSourceURL(String sourceURL) {
+			this.sourceURL = sourceURL;
+		}
+
+
+		public String toString(){
+			return this.resultText;
+		}
+
+		@Override
+		public boolean equals(Object o) {
+			if (this == o) return true;
+			if (o == null || getClass() != o.getClass()) return false;
+
+			Fragment fragment = (Fragment) o;
+
+			if (resultText == null && fragment.resultText == null) {
+				return true;
+			} else if ((resultText == null && fragment.resultText != null) || (resultText != null && fragment.resultText == null)) {
+				return false;
+			}
+
+			StringDistanceMeasurer sdm = new StringDistanceMeasurer();
+			return sdm.measureStringDistance(resultText, fragment.resultText) > 0.8;
+		}
+
+		@Override
+		public int hashCode() {
+			return resultText != null ? resultText.hashCode() : 0;
+		}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java
index 3305ad5..bd9ee1f 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java
@@ -1,11 +1,11 @@
-package opennlp.tools.apps.contentgen.multithreaded;
-
-import java.util.EventObject;
-
-public class MyEvent extends EventObject {
-
-	public MyEvent(Object arg0) {
-		super(arg0);
-	}
-
-}
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import java.util.EventObject;
+
+public class MyEvent extends EventObject {
+
+	public MyEvent(Object arg0) {
+		super(arg0);
+	}
+
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java
index ecdced4..5a5c33c 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java
@@ -1,8 +1,8 @@
-package opennlp.tools.apps.contentgen.multithreaded;
-
-import java.util.EventListener;
-
-
-public interface MyEventListener extends EventListener{
-	public void MyEvent(MyEvent evt);
-}
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import java.util.EventListener;
+
+
+public interface MyEventListener extends EventListener{
+	public void MyEvent(MyEvent evt);
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html
index 1c5dfb2..b352b20 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html
@@ -1,37 +1,37 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
- 
-<html xmlns='http://www.w3.org/1999/xhtml'>
-   <head >
-      <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>
-      <title >Submit Your Essay Writing request here</title>
-   </head>
-<body>
-<h1>Submit Your Essay Writing request here / Envie su solicitud ensayo escrito aqui</h1>
- 
-<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/contentgen/?resourceDir=/home/solr/solr-4.4.0/example/src/test/resources&workDir=/home/solr/solr-4.4.0/example/solr-webapp/webapp/WEB-INF/lib&relevanceThreshold=0.5&bingKey=e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=' >
-   <p>
-   Topic for your essay/Tema de su ensayo: <input type='text' name='q' value='albert einstein' size='35' maxlength='100'/>
-   </p>
-   <p>
-   Email to receive your essay/para recibir su ensayo: <input type='text' name='email' />
-   </p>
-   
-   <p>
-   Select language/seleccionar el idioma: <select name="lang" >
-   		<option value="en-US"> English</option>
- 		<option value="es-US"> Espaniol</option>
- 		<option value="de-DE"> German</option>
-	</select>
-	</p>
-	<p>
-   Number of Bing calls to write a this essay: <input type='text' name='stepsNum' value='20' size='5' maxlength='10'/>
-   Number of Bing search results for each call to use for writing: <input type='text' name='searchResultsNum' value='100' size='5' maxlength='10'/>
-   </p>
-<p>
-   <input type='submit' name='Submit' value='Submit/presentar' />
-   </p>
-</form>
- 
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+ 
+<html xmlns='http://www.w3.org/1999/xhtml'>
+   <head >
+      <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>
+      <title >Submit Your Essay Writing request here</title>
+   </head>
+<body>
+<h1>Submit Your Essay Writing request here / Envie su solicitud ensayo escrito aqui</h1>
+ 
+<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/contentgen/?resourceDir=/home/solr/solr-4.4.0/example/src/test/resources&workDir=/home/solr/solr-4.4.0/example/solr-webapp/webapp/WEB-INF/lib&relevanceThreshold=0.5&bingKey=e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=' >
+   <p>
+   Topic for your essay/Tema de su ensayo: <input type='text' name='q' value='albert einstein' size='35' maxlength='100'/>
+   </p>
+   <p>
+   Email to receive your essay/para recibir su ensayo: <input type='text' name='email' />
+   </p>
+   
+   <p>
+   Select language/seleccionar el idioma: <select name="lang" >
+   		<option value="en-US"> English</option>
+ 		<option value="es-US"> Espaniol</option>
+ 		<option value="de-DE"> German</option>
+	</select>
+	</p>
+	<p>
+   Number of Bing calls to write a this essay: <input type='text' name='stepsNum' value='20' size='5' maxlength='10'/>
+   Number of Bing search results for each call to use for writing: <input type='text' name='searchResultsNum' value='100' size='5' maxlength='10'/>
+   </p>
+<p>
+   <input type='submit' name='Submit' value='Submit/presentar' />
+   </p>
+</form>
+ 
+</body>
+</html>
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html
index 2fbf1c9..a1be468 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html
@@ -1,47 +1,47 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
- 
-<html xmlns='http://www.w3.org/1999/xhtml'>
-   <head >
-      <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>
-      <title >Submit Your Code Writing request here</title>
-   </head>
-<body>
-<h1>Submit Your Code Writing request here</h1>
- 
-<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/nlprog2code/?' >
-   <p>
-   Write what you want your program to do in natural language <input type='text' name='line' value='define a class named ...' size='35' maxlength='120'/>
-   </p>
-    <p>
-    <input type='text' name='line' value='define a function taking a string s1 and an integer i2 ' size='35' maxlength='150'/>
-   </p>
-   <p>
-     <input type='text' name='line' size='35' maxlength='200'/>
-   </p>
-   <p>
-    <input type='text' name='line' size='35' maxlength='200'/>
-   </p>
-   <p>
-    <input type='text' name='line' size='35' maxlength='200'/>
-   </p>
-   <p>
-     <input type='text' name='line' size='35' maxlength='200'/>
-   </p>
-   <p>
-    <input type='text' name='line' size='35' maxlength='200'/>
-   </p>
-   <p>
-    <input type='text' name='line' size='35' maxlength='200'/>
-   </p>
-   <p>
-    <input type='text' name='line' size='35' maxlength='200'/>
-   </p>
-   
-<p>
-   <input type='submit' name='Submit' value='Submit' />
-   </p>
-</form>
- 
-</body>
-</html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+ 
+<html xmlns='http://www.w3.org/1999/xhtml'>
+   <head >
+      <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>
+      <title >Submit Your Code Writing request here</title>
+   </head>
+<body>
+<h1>Submit Your Code Writing request here</h1>
+ 
+<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/nlprog2code/?' >
+   <p>
+   Write what you want your program to do in natural language <input type='text' name='line' value='define a class named ...' size='35' maxlength='120'/>
+   </p>
+    <p>
+    <input type='text' name='line' value='define a function taking a string s1 and an integer i2 ' size='35' maxlength='150'/>
+   </p>
+   <p>
+     <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+     <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   
+<p>
+   <input type='submit' name='Submit' value='Submit' />
+   </p>
+</form>
+ 
+</body>
+</html>
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
index d2e77e3..1af52d9 100755
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
@@ -1,738 +1,738 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.apps.object_dedup;
-
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Pattern;
-
-import opennlp.tools.similarity.apps.BingQueryRunner;
-import opennlp.tools.similarity.apps.HitBase;
-import opennlp.tools.similarity.apps.utils.LevensteinDistanceFinder;
-import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
-import opennlp.tools.similarity.apps.utils.Utils;
-import opennlp.tools.textsimilarity.TextProcessor;
-
-import org.apache.commons.lang.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/* This is a template class for deduplicator */
-
-public class SimilarityAccessorBase
-{
-	private static final Logger LOG = LoggerFactory.getLogger(SimilarityAccessorBase.class);
-
-	public static final int MAX_EV_TO_RECOMM = 6;
-
-	private List<String> namesBothSides;
-
-	protected static final String[] englishPrepositions = new String[] { "a", "aboard", "about", "above", "absent",
-		"across", "after", "against", "along", "alongside", "among", "around", "as", "at", "before", "behind", "below",
-		"beneath", "between", "beyond", "but", "by", "despite", "down", "during", "except", "excluding", "failing",
-		"following", "for", "from", "in", "including", "inside", "into", "like", "near", "next", "of", "off", "on",
-		"onto", "only", "opposite", "out", "outside", "over", "pace", "past", "per", "since", "than", "through", "and",
-		"thru", "till", "to", "toward", "under", "up", "upon", "versus", "with", "within", "you", "must", "know",
-		"when" };
-
-	protected List<String> commonWordsInEventTitles = Arrays.asList(new String[] { "community", "party", "film",
-		"music", "exhibition", "kareoke", "guitar", "quartet", "reggae", "r&b", "band", "dj ", "piano", "pray",
-		"worship", "god", "training", "class", "development", "training", "class", "course", "our", "comedy", ",fun",
-		"musical", "group", "alliance", "session", "feeding", "introduction", "school", "conversation", "learning",
-		"nursery", "unity", "trivia", "chat", "conference", "tuition", "technology", "teen", "communication",
-		"reception", "management", "beginner", "beginning", "collabora", "reuninon", "political", "course", "age",
-		"ages", "through", "grade", "networking", "workshop", "demonstration", "tuning", "program", "summit",
-		"convention", "day", "night", "one", "two", "outfest", "three", "online", "writing", "seminar", "coach",
-		",expo", "advanced", "beginner", "intermediate", "earn", "free", "ii", "iii", "skills", "skill", "artist",
-		"summer", "winter", "autumn", "spring", "camp", "vacation", "miscrosoft", "kid", "child", "kids", "children",
-		"every", "everyone", "dancer", "dancers", "senior", "seniors", "basic", "elementary", "outfest", "2008",
-		"2009", "2010", "2011", "2012", "monday", "tuesday", "wednesday", "thirsday", "friday", "saturday", "sunday",
-		"mondays", "tuesdays", "wednesdays", "thirsdays", "fridays", "saturdays", "sundays", "men" // ?
-	});
-
-	private BingQueryRunner webSearch = new BingQueryRunner();
-
-	private StringDistanceMeasurer stringDistanceMeasurer = new StringDistanceMeasurer();
-
-
-	public SimilarityAccessorBase()
-	{
-	}
-
-
-	public void init()
-	{
-		namesBothSides = getWordsThatShouldBeOnBothSidesEvents();
-	}
-
-	protected List<String> removeDollarWordAndNonAlphaFromList(List<String> list)
-	{
-		List<String> result = new ArrayList<String>();
-		Pattern p = Pattern.compile("^\\$(\\d{1,3}(\\,\\d{3})*|(\\d+))(\\.\\d{2})?$");
-		for (String w : list)
-		{
-			if (!(p.matcher(w).find()) && StringUtils.isAlphanumeric(w) && (w.length() >= 3 || !StringUtils.isAlpha(w)))
-				result.add(w);
-		}
-		return result;
-	}
-
-
-	public List<String> getWordsThatShouldBeOnBothSidesEvents()
-	{
-/*
-		names.addAll(Arrays.asList(new String[] { "woman", "man", "women", "men", "womans", "mans", "womens", "mens",
-			"boy", "girl", "boys", "girls", "men's", "women's", "woman's", "ice", // for disney
-			"flight", "intermediate", "advanced", "beginner",
-			// "tour", TODO special consideration
-			"helicopter", "sexual", "junior", "jr" }));
-			*/
-		return null;
-
-	}
-
-	protected Boolean applySemanticNameSimilarityRule(Object es1,
-		Object es2)
-	{
-		
-		//TODO check attributes of objects
-		/*
-		if (!(es1.getVenueName().endsWith(es2.getVenueName()) || es2.getVenueName().endsWith(es1.getVenueName())))
-			return false;
-		if (Math.abs(es1.getStarttime().getTime() - es2.getStarttime().getTime()) > 100000)
-			return false;
-			*/
-
-		return true;
-
-	}
-
-	// this rule extract "OF" part and treats it as a whole expression
-	protected void applySubPhraseExtractionRule(List<String> name1Tokens, List<String> name2Tokens)
-	{
-		if (name1Tokens.indexOf("of") > 0 && name2Tokens.indexOf("of") > 0)
-		{
-			name1Tokens = extractMainNounPhrase(name1Tokens);
-			name2Tokens = extractMainNounPhrase(name2Tokens);
-		}
-	}
-
-	private Boolean attemptShortTitlesSimilarityInWebSpace(String name1, String name2)
-	{
-
-		// first delimeter processing
-		String name1v = name1.replace("'", "").replace("-", " ");
-		String name2v = name2.replace("'", "").replace("-", " ");
-		String name1vv = name1.replace("'", "");
-		String name2vv = name2.replace("'", "");
-		String name1vvv = name1.replace("-", " ");
-		String name2vvv = name2.replace("-", " ");
-
-		if (name1.startsWith(name2) || name1vv.startsWith(name2) || name1.startsWith(name2v)
-			|| name1.startsWith(name2vv) || name1.startsWith(name2vvv) || name1v.startsWith(name2v)
-			|| name1v.startsWith(name2vv) || name2.startsWith(name1) || name2vv.startsWith(name1)
-			|| name2.startsWith(name1v) || name2vvv.startsWith(name1vv) || name2.startsWith(name1vvv)
-			|| name2v.startsWith(name1v) || name2v.startsWith(name1vv) || name1.endsWith(name2)
-			|| name1vv.endsWith(name2) || name1.endsWith(name2v) || name1.endsWith(name2vv) || name1.endsWith(name2vvv)
-			|| name1v.endsWith(name2v) || name1v.endsWith(name2vv) || name2.endsWith(name1) || name2vv.endsWith(name1)
-			|| name2.endsWith(name1v) || name1vvv.endsWith(name2vv) || name2.endsWith(name1vvv)
-			|| name2v.endsWith(name1v) || name2v.endsWith(name1vv))
-		{
-			LOG.info("Found fuzzy substring of name1 and name2");
-			return true;
-		}
-		if (name1.length() > 12 && name2.length() > 12)
-			return false;
-
-		return areNamesSemanticallyCloseInWebSearchSpace(name1, name2, 0.8f, false).isDecision();
-
-	}
-
-	public Boolean applyBothSidesRuleEvent(String name1, String name2)
-	{
-		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), false);
-		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), false);
-		// get unique names
-		List<String> name1TokensC = new ArrayList<String>(name1Tokens), name2TokensC = new ArrayList<String>(
-			name2Tokens);
-		;
-		name1TokensC.removeAll(name2Tokens);
-		name2TokensC.removeAll(name1Tokens);
-		// get all unique names
-		name1TokensC.addAll(name2TokensC);
-
-		name1TokensC.retainAll(namesBothSides);
-		name1Tokens.retainAll(name2Tokens);
-
-		if ((name1TokensC.size() > 0 && name1Tokens.size() < 3) || (name1TokensC.size() > 1 && name1Tokens.size() < 5))
-		{ // 'mens == men; case !(name1TokensC.size()==2 && (name1TokensC.get(0).indexOf(name1TokensC.get(1))>-1 ||
-			// name1TokensC.get(1).indexOf(name1TokensC.get(0))>-1 ))){
-			LOG.info("Found required common word present on one side and not on the other: " + name1TokensC.toString()
-				+ " and less than 3 keywords overlap (or >1 common words and less than 5 overl");
-			return false;
-		}
-		else
-			return true;
-	}
-
-	protected List<String> tokenizeAndStem(String input)
-	{
-
-		List<String> results = new ArrayList<String>();
-		List<String> toks = TextProcessor.fastTokenize(input.toLowerCase(), false);
-		for (String word : toks)
-		{
-			try
-			{
-				if (word.equals("theatre"))
-					word = "theater";
-				results.add(word);
-			}
-			catch (Exception e)
-			{
-				results.add(word);
-			}
-		}
-		return results;
-	}
-
-	protected List<String> stemList(List<String> toks)
-	{
-
-		List<String> results = new ArrayList<String>();
-		for (String word : toks)
-		{
-			try
-			{
-				if (word.equals("theatre"))
-					word = "theater";
-				results.add(word);
-			}
-			catch (Exception e)
-			{
-				results.add(word);
-			}
-		}
-		return results;
-	}
-
-	public List<String> removeVenuePart(ArrayList<String> toks)
-	{
-		List<String> results = new ArrayList<String>();
-		boolean bVenuePart = false;
-		for (String word : toks)
-		{
-			// beginning of venue part
-			if (word.equals("at") || word.equals("@"))
-				bVenuePart = true;
-			// end of venue part
-			if (!StringUtils.isAlphanumeric(word) || word.startsWith("<punc"))
-				bVenuePart = false;
-
-			if (!bVenuePart && !word.startsWith("<punc"))
-				results.add(word);
-
-		}
-		return results;
-	}
-
-	protected boolean isCapitalized(String lookup)
-	{
-		String[] titleWords = lookup.split(" ");
-		int count = 0;
-		for (String word : titleWords)
-		{
-			if (word.length() < 2) // '-', '|', ':'
-				break;
-
-			if (word.equals(word.toLowerCase()) && (!Arrays.asList(englishPrepositions).contains(word))
-				&& word.length() > 3 && StringUtils.isAlphanumeric(word))
-				continue; // was return false;
-			if (count > 3)
-				break;
-			count++;
-		}
-		return true;
-	}
-
-	protected List<String> extractMainNounPhrase(List<String> name1Tokens)
-	{
-		List<String> results = new ArrayList<String>();
-		int ofPos = name1Tokens.indexOf("of");
-		List<String> ofList = name1Tokens.subList(ofPos + 1, name1Tokens.size() - 1);
-		// now iterate till next preposition towards the end of noun phrase
-		for (String preposCand : ofList)
-		{
-			if (Arrays.asList(englishPrepositions).contains(preposCand))
-				break;
-			results.add(preposCand);
-		}
-		return results;
-
-	}
-
-	public boolean verifyEventAttributesPost(List<String> name1Tokens, List<String> name2Tokens)
-	{
-		String[] attributeNamesPost = { "age", "ages", "game", "games", "grade", "grades", "level", "levels", "vs",
-			"vs.", "versus", "pottery", "competition", "contest", "skill", "skills", "day", "only", "basic", "class",
-			"completed",
-			// "tour", ?
-			"advanced", "beginner", "intermediate", "flight", "workshop", "latin", "adobe", "ballet", "dinner",
-			"breakfast", "lunch", "summer", // "canyon"
-			"tfestival", "festival", "mfestival" };
-		try
-		{
-			for (String attr : attributeNamesPost)
-			{
-
-				int agePos1 = name1Tokens.indexOf(attr);
-				int agePos2 = name2Tokens.indexOf(attr);
-				if (agePos1 > -1 && agePos2 > -1 && agePos1 < name1Tokens.size() - 1
-					&& agePos2 < name2Tokens.size() - 1)
-				{
-					double dist = LevensteinDistanceFinder.levensteinDistance(name1Tokens.get(agePos1 + 1),
-						name2Tokens.get(agePos2 + 1), 1, 10, 1, 10);
-					if (!name1Tokens.get(agePos1 + 1).equalsIgnoreCase(name2Tokens.get(agePos2 + 1))
-						&& (dist > 2.99 || name1Tokens.get(agePos1 + 1).length() < 4))
-					{
-						LOG.info("Found disagreement in the attrib value for " + attr + " value = "
-							+ name1Tokens.get(agePos1 + 1) + " <=> " + name2Tokens.get(agePos2 + 1));
-						return false;
-					}
-				}
-			}
-		}
-		catch (Exception e)
-		{
-			e.printStackTrace();
-		}
-		return true;
-	}
-
-	public boolean verifyEventAttributesPre(List<String> name1Tokens, List<String> name2Tokens)
-	{
-
-		String[] attributeNamesPre = { "hour", "vs", "vs.", "versus", "pottery", "program", "day", "only",
-			// dance styles followed by a param
-			"swing", "rumba", "samba", "doble",
-			"violence", //
-			// "level",
-			"class", "classes", "kid", "kids", "test", "west", "summer_camp", "session", "tfestival", "festival",
-			"mfestival" };
-		try
-		{
-			for (String attr : attributeNamesPre)
-			{
-				int agePos1 = name1Tokens.indexOf(attr);
-				int agePos2 = name2Tokens.indexOf(attr);
-				if (agePos1 > 0 && agePos2 > 0)
-				{ // not the first word is attr name
-					if (!name1Tokens.get(agePos1 - 1).equalsIgnoreCase(name2Tokens.get(agePos2 - 1))
-						&& (agePos1 < 2 || !name1Tokens.get(agePos1 - 2).equalsIgnoreCase(name2Tokens.get(agePos2 - 1)))
-						&&
-						// ((agePos1<2 && agePos2 <2) || !name1Tokens.get(agePos1 -
-						// 2).equalsIgnoreCase(name2Tokens.get(agePos2 - 2 ))) &&
-						(agePos2 < 2 || !name1Tokens.get(agePos1 - 1).equalsIgnoreCase(name2Tokens.get(agePos2 - 2)))
-
-					)
-					{
-						LOG.info("Found disagreement in the attrib value for " + attr + " value = "
-							+ name1Tokens.get(agePos1 - 1) + " and " + name2Tokens.get(agePos2 - 1));
-						return false;
-					}
-				}
-			}
-		}
-		catch (Exception e)
-		{
-			e.printStackTrace();
-		}
-		return true;
-	}
-
-	protected boolean bDifferentGroupOneSubnameOfAnother(String name1, String name2)
-	{
-		// first check a special case that both name1 and name2 are DIFFERENT groups at last.fm
-		Map<String, Integer> map1 = null; //LastFM_APIManager.extractTagsForArtist(name1);
-		Map<String, Integer> map2 = null; //LastFM_APIManager.extractTagsForArtist(name2);
-		if (map1 != null && map2 != null && map1.size() > 0 && map2.size() > 0)
-			map1.entrySet().removeAll(map2.entrySet());
-		if (map1.size() > 0) // same or subset of tags => different groups
-			return true;
-
-		return false;
-	}
-
-	public boolean applyBothSidesRule(String name1, String name2)
-	{
-		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), false);
-		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), false);
-		// get unique names
-		List<String> name1TokensC = new ArrayList<String>(name1Tokens), name2TokensC = new ArrayList<String>(
-			name2Tokens);
-		;
-		name1TokensC.removeAll(name2Tokens);
-		name2TokensC.removeAll(name1Tokens);
-		// get all unique names
-		name1TokensC.addAll(name2TokensC);
-
-		name1TokensC.retainAll(namesBothSides);
-		if (name1TokensC.size() > 0)
-			return false;
-		else
-			return true;
-	}
-
-	private boolean succeededMenWomenSportsRule(String name1, String name2)
-	{
-		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), false);
-		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), false);
-		if (name1Tokens.contains("men") || name2Tokens.contains("men") || name1Tokens.contains("women")
-			|| name2Tokens.contains("women") || name1Tokens.contains("disney") || name2Tokens.contains("disney"))
-		{ // all words should be the
-			// same
-			name1Tokens.removeAll(name2Tokens);
-			name1Tokens.removeAll(Arrays.asList(englishPrepositions));
-			name1Tokens.removeAll(Arrays.asList(commonWordsInEventTitles));
-			if (name1Tokens.size() < 1)
-				return true;
-
-			return false;
-		}
-		else
-			return true;
-
-	}
-
-	private boolean succeededSpecialGroupsSymphoniesRule(String name1, String name2)
-	{
-		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), false);
-		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), false);
-		if (name1Tokens.contains("orchestra") || name2Tokens.contains("symphony") || name2Tokens.contains("orchestra")
-			|| name1Tokens.contains("symphony") || name2Tokens.contains("band") || name1Tokens.contains("band")
-			|| name2Tokens.contains("trio") || name1Tokens.contains("trio") || name1Tokens.contains("soleil")
-			|| name2Tokens.contains("soleil") || name1Tokens.contains("disney") || name2Tokens.contains("disney")
-			|| name1Tokens.contains("lang") || name2Tokens.contains("lang")) // special group 'lang lang'
-		{ // all words should be the
-			// same
-			List<String> name1TokensClone = new ArrayList<String>(name1Tokens);
-			name1Tokens.removeAll(name2Tokens);
-			name2Tokens.removeAll(name1TokensClone);
-			name1Tokens.addAll(name2Tokens);
-			name1Tokens.removeAll(Arrays.asList(this.englishPrepositions));
-			// name1Tokens.removeAll(Arrays.asList(this.commonWordsInEventTitles));
-			if (name1Tokens.size() < 1)
-				return true;
-
-			return false;
-		}
-		else
-			return true;
-
-	}
-
-	public int getAttemptedNameMerge(String name1, String name2)
-	{
-		name1 = name1.replaceAll("[a-z][A-Z]", "$0&$0").replaceAll(".&.", " ");
-		; // suspected word merge if higher case is in the middle of word
-		name2 = name2.replaceAll("[a-z][A-Z]", "$0&$0").replaceAll(".&.", " ");
-
-		name1 = name1.toLowerCase();
-		name2 = name2.toLowerCase();
-		if (name1.equals(name2) || name1.startsWith(name2) || name2.startsWith(name1) || name1.endsWith(name2)
-			|| name1.endsWith(name2) || name1.indexOf(name2) > -1 || name1.indexOf(name2) > -1) // ??
-			return 2;
-		String name2r = name2.replace(" ", "");
-		if (name1.equals(name2r) || name1.startsWith(name2r) || name1.startsWith(name2r) || name1.endsWith(name2r)
-			|| name1.endsWith(name2r))
-			return 1;
-		String name1r = name1.replace(" ", "");
-		if (name1r.equals(name2r) || name1r.startsWith(name2r) || name1r.startsWith(name2) || name1r.endsWith(name2r)
-			|| name1r.endsWith(name2r) || name2r.equals(name1r) || name2r.startsWith(name1r)
-			|| name2r.startsWith(name1) || name2r.endsWith(name1r) || name2r.endsWith(name2)
-
-		)
-			return 1;
-
-		if (stringDistanceMeasurer.measureStringDistance(name1, name2) > 0.95)
-			return 2;
-		if (stringDistanceMeasurer.measureStringDistance(name1, name2) > 0.70)
-			return 1;
-		return 0;
-	}
-
-	private String normalizeGenderAndOtherAttributes(String name1)
-	{
-		name1 = Utils.convertToASCII(name1.replace("/", " ").replace("w/", "with ")).replace('!', ' ').toLowerCase();
-
-		name1 = name1.replace("woman", "women").replace("womans", "women").replace("womens", "women")
-			.replace("women's", "women").replace("woman's", "women");
-		name1 = name1.replace(" man ", " men ").replace(" mans ", " men ").replace(" men's ", " men ")
-			.replace(" man's ", " men ").replace(" mens ", " men ").replace("summer camp", "summer_camp")
-			.replace("gaea theatre festival", "tfestival"); // need regexp for this
-		return name1;
-	}
-
-	/*
-	 * Main semantic similarity function which applies boundary cases rule and focus on web mining rule The main
-	 * criteria for a commonality between titles: to form an entity, searchable on the web
-	 */
-	public DedupResult areNamesSemanticallyCloseWebMineCommonPart(String name1, String name2, String venue)
-	{
-		// normalize gender
-		name1 = normalizeGenderAndOtherAttributes(name1);
-		name2 = normalizeGenderAndOtherAttributes(name2);
-
-		Boolean bShortTitlesSimilarInWebSpace = attemptShortTitlesSimilarityInWebSpace(name1, name2);
-		if (bShortTitlesSimilarInWebSpace)
-			return new DedupResult("Accepted as short title by web mining", 2, true);
-
-		StringBuffer reason = new StringBuffer();
-		List<String> venueToks = removeVenuePart(TextProcessor.fastTokenize(venue.toLowerCase(), false));
-
-		LOG.info("\nComputing similarity between name = '" + name1 + "' and name = '" + name2 + "'");
-		// convert titles into token lists
-		List<String> name1Tokens = removeVenuePart(TextProcessor.fastTokenize(name1.toLowerCase(), true));
-		List<String> name2Tokens = removeVenuePart(TextProcessor.fastTokenize(name2.toLowerCase(), true));
-		// applySubPhraseExtractionRule()
-		Boolean bSameAttrib = verifyEventAttributesPost(name1Tokens, name2Tokens)
-			&& verifyEventAttributesPre(name1Tokens, name2Tokens);
-		if (!bSameAttrib)
-		{
-			LOG.info("similar events but different attributes");
-			return new DedupResult("similar events but different attributes", 0, false);
-		}
-
-		boolean bothSodesSuccess = applyBothSidesRuleEvent(name1, name2);
-		if (!bothSodesSuccess)
-		{
-			return new DedupResult("Failed common words test for sports", 0, false);
-		}
-
-		float dist = (float) LevensteinDistanceFinder.levensteinDistance(name1, name2, 1, 10, 1, 10);
-		if (dist < 5.1)
-		{
-			LOG.info("Found low LevensteinDistance for name1 and name2");
-			return new DedupResult("Found low LevensteinDistance", 2, true);
-		}
-
-		int nameMergeScore = getAttemptedNameMerge(name1, name2);
-		if (nameMergeScore > 0)
-		{
-			LOG.info("Found low NameMerge Distance for name1 and name2");
-			return new DedupResult("Found low  NameMerge Distance", 2, true);
-		}
-
-		// todo take into account order
-		// form common sub-list of tokens
-		name1Tokens.retainAll(name2Tokens);
-		name1Tokens.removeAll(venueToks);
-
-		name1Tokens.removeAll(commonWordsInEventTitles);
-		name1Tokens.removeAll(Arrays.asList(englishPrepositions));
-		name1Tokens = removeDollarWordAndNonAlphaFromList(name1Tokens);
-		// todo : to use full string measure
-		// boundary case: too many words => just do counts
-		float commonPortion = (float) name1Tokens.size() / (float) name2Tokens.size();
-		if (commonPortion > 0.8 || name1Tokens.size() >= 4)
-		{ // after typical
-			// title words
-			// are revomed 4
-			// looks OK
-			LOG.info("Accepted since substantial common part");
-			return new DedupResult("Accepted since substantial common part", Math.max((int) (commonPortion * 5.0), 2),
-				true);
-		}
-		// boundary case: no overlap
-		if (name1Tokens.size() < 1)
-		{
-			LOG.info("Rejected since nothing in common");
-			return new DedupResult("Rejected since nothing in common", 0, false);
-		}
-		// get from list of tokens back to words to get search expression
-		String entityExpression = name1Tokens.toString().replace('[', ' ').replace(']', ' ').replace(',', ' ')
-			.replace("  ", " ").trim();
-		/*
-		 * // now try name merge reduced strings String entityExpression1 = name1TokensC.toString().replace('[',
-		 * ' ').replace(']', ' ').replace(',', ' ') .replace("  ", " ").trim(); String entityExpression2 =
-		 * name2Tokens.toString().replace('[', ' ').replace(']', ' ').replace(',', ' ') .replace("  ", " ").trim();
-		 * 
-		 * nameMergeScore = getAttemptedNameMerge(entityExpression1, entityExpression2); if (nameMergeScore>0){
-		 * LOG.info("Found low NameMerge Distance for REDUCED name1 and name2"); return new
-		 * DedupResult("Found low  NameMerge Distance REDUCED", 2, true);
-		 * 
-		 * }
-		 */
-
-		// Before doing web mining, make sure overlap between titles is NOT a
-		// set of common english words (use the vocabulary)
-		// if all words are common, then NOT an entity
-		if (name1Tokens.size() < 2)
-		{
-			boolean bCommonEnglishWord = false;
-			for (String word : name1Tokens)
-			{
-	//			if (stopList.isCommonWord(word) /*&& mostFrequent1000Words.isMostFrequent1000Word(word)*/)
-	//				bCommonEnglishWord = true;
-			}
-
-			if (bCommonEnglishWord)
-			{
-				LOG.info("Rejected common entity: common word = " + entityExpression);
-				return new DedupResult("Rejected since common entity is common English word = " + entityExpression, 0,
-					false);
-			}
-		}
-		// accept common expression
-		LOG.info("Formed common entity = " + entityExpression);
-		reason.append("Formed common entity = " + entityExpression + "\n");
-		// now go to the web / bing api with this common expression
-		List<HitBase> searchResult = webSearch.runSearch(entityExpression);
-		float entityScore = 0f;
-		if (searchResult != null)
-		{
-			int count = 0;
-			for (HitBase item : searchResult)
-			{
-				String lookup = item.getTitle();
-				LOG.info("Bing hit title = '" + lookup + "'");
-				reason.append("Bing hit title = '" + lookup + "'\n");
-				if (count > 4)
-					break;
-				count++;
-				// if occurrence is not capitalized then rejected, do not take
-				// into account in score
-				if (!isCapitalized(lookup))
-				{
-					LOG.info("Rejected hit title since not capitalized");
-					reason.append("Rejected hit title since not capitalized\n");
-					continue;
-				}
-
-				/*
-				 * if (lookup.indexOf('-')>0 ){ lookup = lookup.split("-")[0]; }
-				 */
-				// now compute overlap between what found on the web for hit's
-				// title and the common expression between events
-				List<String> lookupTokens = tokenizeAndStem(lookup);
-				lookupTokens.retainAll(stemList(name1Tokens));
-				if (lookupTokens.size() >= name1Tokens.size())
-					// increment score if found hit title is acceptable
-					entityScore += 1.0;
-				else
-				{
-					LOG.info("Found hit title " + lookupTokens + " does not cover comonality expr = " + name1Tokens);
-					entityScore += 0.25;
-
-				}
-
-			}
-		}
-		return new DedupResult(reason.toString(), (int) entityScore, entityScore > 1.0);
-	}
-
-	public DedupResult areNamesSemanticallyCloseInWebSearchSpace(String name1, String name2, Float thresh, boolean bStem)
-	{
-
-		if (thresh == null || thresh == 0f)
-		{
-			thresh = 0.8f;
-		}
-
-		// normalize gender
-		name1 = normalizeGenderAndOtherAttributes(name1);
-		name2 = normalizeGenderAndOtherAttributes(name2);
-
-		StringBuffer reason = new StringBuffer();
-
-		boolean bSportsOrOrchestra = !succeededMenWomenSportsRule(name1, name2);
-		if (bSportsOrOrchestra)
-			return new DedupResult("Sports rule: different teams or teams of different venues", 0, false);
-
-		bSportsOrOrchestra = !succeededSpecialGroupsSymphoniesRule(name1, name2);
-		if (bSportsOrOrchestra)
-			return new DedupResult("SpecialGroupsSymphoniesRule: different circus/band", 0, false);
-
-		LOG.info("\nComputing similarity between name = '" + name1 + "' and name = '" + name2 + "'");
-
-		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), true);
-		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), true);
-		Boolean bSameAttrib = verifyEventAttributesPost(name1Tokens, name2Tokens)
-			&& verifyEventAttributesPre(name1Tokens, name2Tokens);
-		if (!bSameAttrib)
-		{
-			LOG.info("similar events but different attributes");
-			return new DedupResult("similar events but different attributes", 0, false);
-		}
-
-		List<HitBase> searchResult1 = webSearch.runSearch(name1);
-		List<HitBase> searchResult2 = webSearch.runSearch(name2);
-		int score = 0;
-		if (searchResult1 != null && searchResult2 != null)
-		{
-			for (HitBase item1 : searchResult1)
-			{
-				if (item1.getUrl().indexOf("myspace") > -1 || item1.getUrl().indexOf("wiki") > -1)
-					continue;
-				for (HitBase item2 : searchResult2)
-				{
-					String lookup1 = item1.getTitle().replace("Facebook", "").replace("LinkedIn", "")
-						.replace("MySpace", "");
-					String lookup2 = item2.getTitle().replace("Facebook", "").replace("LinkedIn", "")
-						.replace("MySpace", "");
-					double d = 0;
-					if (bStem)
-						d = stringDistanceMeasurer.measureStringDistance(lookup1, lookup2);
-					else
-						d = stringDistanceMeasurer.measureStringDistanceNoStemming(lookup1, lookup2);
-					if (d > thresh) // 0.8)
-					{
-
-						reason.append("Found common search result title for group names '" + lookup1 + " < > "
-							+ lookup2 + " sim = " + d + "\n");
-						LOG.info(("Found common search result title for group names '" + lookup1 + " < > " + lookup2
-							+ " sim = " + d));
-						score++;
-					}
-
-				}
-			}
-		}
-
-		Boolean bothSidesSuccess = applyBothSidesRule(name1, name2);
-		if (!bothSidesSuccess)
-		{
-			score = 1;
-			reason.append("Failed common words test for sports");
-		}
-		if (score > 0)
-		{
-			Boolean bDifferentGroup = bDifferentGroupOneSubnameOfAnother(name1, name2);
-			if (bDifferentGroup)
-			{
-				score = 1;
-				reason.append("Failed common words test for sports");
-			}
-		}
-		return new DedupResult(reason.toString(), score, score > 1);
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.object_dedup;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import opennlp.tools.similarity.apps.BingQueryRunner;
+import opennlp.tools.similarity.apps.HitBase;
+import opennlp.tools.similarity.apps.utils.LevensteinDistanceFinder;
+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
+import opennlp.tools.similarity.apps.utils.Utils;
+import opennlp.tools.textsimilarity.TextProcessor;
+
+import org.apache.commons.lang.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/* This is a template class for deduplicator */
+
+public class SimilarityAccessorBase
+{
+	private static final Logger LOG = LoggerFactory.getLogger(SimilarityAccessorBase.class);
+
+	public static final int MAX_EV_TO_RECOMM = 6;
+
+	private List<String> namesBothSides;
+
+	protected static final String[] englishPrepositions = new String[] { "a", "aboard", "about", "above", "absent",
+		"across", "after", "against", "along", "alongside", "among", "around", "as", "at", "before", "behind", "below",
+		"beneath", "between", "beyond", "but", "by", "despite", "down", "during", "except", "excluding", "failing",
+		"following", "for", "from", "in", "including", "inside", "into", "like", "near", "next", "of", "off", "on",
+		"onto", "only", "opposite", "out", "outside", "over", "pace", "past", "per", "since", "than", "through", "and",
+		"thru", "till", "to", "toward", "under", "up", "upon", "versus", "with", "within", "you", "must", "know",
+		"when" };
+
+	protected List<String> commonWordsInEventTitles = Arrays.asList(new String[] { "community", "party", "film",
+		"music", "exhibition", "kareoke", "guitar", "quartet", "reggae", "r&b", "band", "dj ", "piano", "pray",
+		"worship", "god", "training", "class", "development", "training", "class", "course", "our", "comedy", ",fun",
+		"musical", "group", "alliance", "session", "feeding", "introduction", "school", "conversation", "learning",
+		"nursery", "unity", "trivia", "chat", "conference", "tuition", "technology", "teen", "communication",
+		"reception", "management", "beginner", "beginning", "collabora", "reuninon", "political", "course", "age",
+		"ages", "through", "grade", "networking", "workshop", "demonstration", "tuning", "program", "summit",
+		"convention", "day", "night", "one", "two", "outfest", "three", "online", "writing", "seminar", "coach",
+		",expo", "advanced", "beginner", "intermediate", "earn", "free", "ii", "iii", "skills", "skill", "artist",
+		"summer", "winter", "autumn", "spring", "camp", "vacation", "miscrosoft", "kid", "child", "kids", "children",
+		"every", "everyone", "dancer", "dancers", "senior", "seniors", "basic", "elementary", "outfest", "2008",
+		"2009", "2010", "2011", "2012", "monday", "tuesday", "wednesday", "thirsday", "friday", "saturday", "sunday",
+		"mondays", "tuesdays", "wednesdays", "thirsdays", "fridays", "saturdays", "sundays", "men" // ?
+	});
+
+	private BingQueryRunner webSearch = new BingQueryRunner();
+
+	private StringDistanceMeasurer stringDistanceMeasurer = new StringDistanceMeasurer();
+
+
+	public SimilarityAccessorBase()
+	{
+	}
+
+
+	public void init()
+	{
+		namesBothSides = getWordsThatShouldBeOnBothSidesEvents();
+	}
+
+	protected List<String> removeDollarWordAndNonAlphaFromList(List<String> list)
+	{
+		List<String> result = new ArrayList<String>();
+		Pattern p = Pattern.compile("^\\$(\\d{1,3}(\\,\\d{3})*|(\\d+))(\\.\\d{2})?$");
+		for (String w : list)
+		{
+			if (!(p.matcher(w).find()) && StringUtils.isAlphanumeric(w) && (w.length() >= 3 || !StringUtils.isAlpha(w)))
+				result.add(w);
+		}
+		return result;
+	}
+
+
+	public List<String> getWordsThatShouldBeOnBothSidesEvents()
+	{
+/*
+		names.addAll(Arrays.asList(new String[] { "woman", "man", "women", "men", "womans", "mans", "womens", "mens",
+			"boy", "girl", "boys", "girls", "men's", "women's", "woman's", "ice", // for disney
+			"flight", "intermediate", "advanced", "beginner",
+			// "tour", TODO special consideration
+			"helicopter", "sexual", "junior", "jr" }));
+			*/
+		return null;
+
+	}
+
+	protected Boolean applySemanticNameSimilarityRule(Object es1,
+		Object es2)
+	{
+		
+		//TODO check attributes of objects
+		/*
+		if (!(es1.getVenueName().endsWith(es2.getVenueName()) || es2.getVenueName().endsWith(es1.getVenueName())))
+			return false;
+		if (Math.abs(es1.getStarttime().getTime() - es2.getStarttime().getTime()) > 100000)
+			return false;
+			*/
+
+		return true;
+
+	}
+
+	// this rule extract "OF" part and treats it as a whole expression
+	protected void applySubPhraseExtractionRule(List<String> name1Tokens, List<String> name2Tokens)
+	{
+		if (name1Tokens.indexOf("of") > 0 && name2Tokens.indexOf("of") > 0)
+		{
+			name1Tokens = extractMainNounPhrase(name1Tokens);
+			name2Tokens = extractMainNounPhrase(name2Tokens);
+		}
+	}
+
+	private Boolean attemptShortTitlesSimilarityInWebSpace(String name1, String name2)
+	{
+
+		// first delimeter processing
+		String name1v = name1.replace("'", "").replace("-", " ");
+		String name2v = name2.replace("'", "").replace("-", " ");
+		String name1vv = name1.replace("'", "");
+		String name2vv = name2.replace("'", "");
+		String name1vvv = name1.replace("-", " ");
+		String name2vvv = name2.replace("-", " ");
+
+		if (name1.startsWith(name2) || name1vv.startsWith(name2) || name1.startsWith(name2v)
+			|| name1.startsWith(name2vv) || name1.startsWith(name2vvv) || name1v.startsWith(name2v)
+			|| name1v.startsWith(name2vv) || name2.startsWith(name1) || name2vv.startsWith(name1)
+			|| name2.startsWith(name1v) || name2vvv.startsWith(name1vv) || name2.startsWith(name1vvv)
+			|| name2v.startsWith(name1v) || name2v.startsWith(name1vv) || name1.endsWith(name2)
+			|| name1vv.endsWith(name2) || name1.endsWith(name2v) || name1.endsWith(name2vv) || name1.endsWith(name2vvv)
+			|| name1v.endsWith(name2v) || name1v.endsWith(name2vv) || name2.endsWith(name1) || name2vv.endsWith(name1)
+			|| name2.endsWith(name1v) || name1vvv.endsWith(name2vv) || name2.endsWith(name1vvv)
+			|| name2v.endsWith(name1v) || name2v.endsWith(name1vv))
+		{
+			LOG.info("Found fuzzy substring of name1 and name2");
+			return true;
+		}
+		if (name1.length() > 12 && name2.length() > 12)
+			return false;
+
+		return areNamesSemanticallyCloseInWebSearchSpace(name1, name2, 0.8f, false).isDecision();
+
+	}
+
+	public Boolean applyBothSidesRuleEvent(String name1, String name2)
+	{
+		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), false);
+		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), false);
+		// get unique names
+		List<String> name1TokensC = new ArrayList<String>(name1Tokens), name2TokensC = new ArrayList<String>(
+			name2Tokens);
+		;
+		name1TokensC.removeAll(name2Tokens);
+		name2TokensC.removeAll(name1Tokens);
+		// get all unique names
+		name1TokensC.addAll(name2TokensC);
+
+		name1TokensC.retainAll(namesBothSides);
+		name1Tokens.retainAll(name2Tokens);
+
+		if ((name1TokensC.size() > 0 && name1Tokens.size() < 3) || (name1TokensC.size() > 1 && name1Tokens.size() < 5))
+		{ // 'mens == men; case !(name1TokensC.size()==2 && (name1TokensC.get(0).indexOf(name1TokensC.get(1))>-1 ||
+			// name1TokensC.get(1).indexOf(name1TokensC.get(0))>-1 ))){
+			LOG.info("Found required common word present on one side and not on the other: " + name1TokensC.toString()
+				+ " and less than 3 keywords overlap (or >1 common words and less than 5 overl");
+			return false;
+		}
+		else
+			return true;
+	}
+
+	protected List<String> tokenizeAndStem(String input)
+	{
+
+		List<String> results = new ArrayList<String>();
+		List<String> toks = TextProcessor.fastTokenize(input.toLowerCase(), false);
+		for (String word : toks)
+		{
+			try
+			{
+				if (word.equals("theatre"))
+					word = "theater";
+				results.add(word);
+			}
+			catch (Exception e)
+			{
+				results.add(word);
+			}
+		}
+		return results;
+	}
+
+	protected List<String> stemList(List<String> toks)
+	{
+
+		List<String> results = new ArrayList<String>();
+		for (String word : toks)
+		{
+			try
+			{
+				if (word.equals("theatre"))
+					word = "theater";
+				results.add(word);
+			}
+			catch (Exception e)
+			{
+				results.add(word);
+			}
+		}
+		return results;
+	}
+
+	public List<String> removeVenuePart(ArrayList<String> toks)
+	{
+		List<String> results = new ArrayList<String>();
+		boolean bVenuePart = false;
+		for (String word : toks)
+		{
+			// beginning of venue part
+			if (word.equals("at") || word.equals("@"))
+				bVenuePart = true;
+			// end of venue part
+			if (!StringUtils.isAlphanumeric(word) || word.startsWith("<punc"))
+				bVenuePart = false;
+
+			if (!bVenuePart && !word.startsWith("<punc"))
+				results.add(word);
+
+		}
+		return results;
+	}
+
+	protected boolean isCapitalized(String lookup)
+	{
+		String[] titleWords = lookup.split(" ");
+		int count = 0;
+		for (String word : titleWords)
+		{
+			if (word.length() < 2) // '-', '|', ':'
+				break;
+
+			if (word.equals(word.toLowerCase()) && (!Arrays.asList(englishPrepositions).contains(word))
+				&& word.length() > 3 && StringUtils.isAlphanumeric(word))
+				continue; // was return false;
+			if (count > 3)
+				break;
+			count++;
+		}
+		return true;
+	}
+
+	protected List<String> extractMainNounPhrase(List<String> name1Tokens)
+	{
+		List<String> results = new ArrayList<String>();
+		int ofPos = name1Tokens.indexOf("of");
+		List<String> ofList = name1Tokens.subList(ofPos + 1, name1Tokens.size() - 1);
+		// now iterate till next preposition towards the end of noun phrase
+		for (String preposCand : ofList)
+		{
+			if (Arrays.asList(englishPrepositions).contains(preposCand))
+				break;
+			results.add(preposCand);
+		}
+		return results;
+
+	}
+
+	public boolean verifyEventAttributesPost(List<String> name1Tokens, List<String> name2Tokens)
+	{
+		String[] attributeNamesPost = { "age", "ages", "game", "games", "grade", "grades", "level", "levels", "vs",
+			"vs.", "versus", "pottery", "competition", "contest", "skill", "skills", "day", "only", "basic", "class",
+			"completed",
+			// "tour", ?
+			"advanced", "beginner", "intermediate", "flight", "workshop", "latin", "adobe", "ballet", "dinner",
+			"breakfast", "lunch", "summer", // "canyon"
+			"tfestival", "festival", "mfestival" };
+		try
+		{
+			for (String attr : attributeNamesPost)
+			{
+
+				int agePos1 = name1Tokens.indexOf(attr);
+				int agePos2 = name2Tokens.indexOf(attr);
+				if (agePos1 > -1 && agePos2 > -1 && agePos1 < name1Tokens.size() - 1
+					&& agePos2 < name2Tokens.size() - 1)
+				{
+					double dist = LevensteinDistanceFinder.levensteinDistance(name1Tokens.get(agePos1 + 1),
+						name2Tokens.get(agePos2 + 1), 1, 10, 1, 10);
+					if (!name1Tokens.get(agePos1 + 1).equalsIgnoreCase(name2Tokens.get(agePos2 + 1))
+						&& (dist > 2.99 || name1Tokens.get(agePos1 + 1).length() < 4))
+					{
+						LOG.info("Found disagreement in the attrib value for " + attr + " value = "
+							+ name1Tokens.get(agePos1 + 1) + " <=> " + name2Tokens.get(agePos2 + 1));
+						return false;
+					}
+				}
+			}
+		}
+		catch (Exception e)
+		{
+			e.printStackTrace();
+		}
+		return true;
+	}
+
+	public boolean verifyEventAttributesPre(List<String> name1Tokens, List<String> name2Tokens)
+	{
+
+		String[] attributeNamesPre = { "hour", "vs", "vs.", "versus", "pottery", "program", "day", "only",
+			// dance styles followed by a param
+			"swing", "rumba", "samba", "doble",
+			"violence", //
+			// "level",
+			"class", "classes", "kid", "kids", "test", "west", "summer_camp", "session", "tfestival", "festival",
+			"mfestival" };
+		try
+		{
+			for (String attr : attributeNamesPre)
+			{
+				int agePos1 = name1Tokens.indexOf(attr);
+				int agePos2 = name2Tokens.indexOf(attr);
+				if (agePos1 > 0 && agePos2 > 0)
+				{ // not the first word is attr name
+					if (!name1Tokens.get(agePos1 - 1).equalsIgnoreCase(name2Tokens.get(agePos2 - 1))
+						&& (agePos1 < 2 || !name1Tokens.get(agePos1 - 2).equalsIgnoreCase(name2Tokens.get(agePos2 - 1)))
+						&&
+						// ((agePos1<2 && agePos2 <2) || !name1Tokens.get(agePos1 -
+						// 2).equalsIgnoreCase(name2Tokens.get(agePos2 - 2 ))) &&
+						(agePos2 < 2 || !name1Tokens.get(agePos1 - 1).equalsIgnoreCase(name2Tokens.get(agePos2 - 2)))
+
+					)
+					{
+						LOG.info("Found disagreement in the attrib value for " + attr + " value = "
+							+ name1Tokens.get(agePos1 - 1) + " and " + name2Tokens.get(agePos2 - 1));
+						return false;
+					}
+				}
+			}
+		}
+		catch (Exception e)
+		{
+			e.printStackTrace();
+		}
+		return true;
+	}
+
+	protected boolean bDifferentGroupOneSubnameOfAnother(String name1, String name2)
+	{
+		// first check a special case that both name1 and name2 are DIFFERENT groups at last.fm
+		Map<String, Integer> map1 = null; //LastFM_APIManager.extractTagsForArtist(name1);
+		Map<String, Integer> map2 = null; //LastFM_APIManager.extractTagsForArtist(name2);
+		if (map1 != null && map2 != null && map1.size() > 0 && map2.size() > 0)
+			map1.entrySet().removeAll(map2.entrySet());
+		if (map1.size() > 0) // same or subset of tags => different groups
+			return true;
+
+		return false;
+	}
+
+	public boolean applyBothSidesRule(String name1, String name2)
+	{
+		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), false);
+		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), false);
+		// get unique names
+		List<String> name1TokensC = new ArrayList<String>(name1Tokens), name2TokensC = new ArrayList<String>(
+			name2Tokens);
+		;
+		name1TokensC.removeAll(name2Tokens);
+		name2TokensC.removeAll(name1Tokens);
+		// get all unique names
+		name1TokensC.addAll(name2TokensC);
+
+		name1TokensC.retainAll(namesBothSides);
+		if (name1TokensC.size() > 0)
+			return false;
+		else
+			return true;
+	}
+
+	private boolean succeededMenWomenSportsRule(String name1, String name2)
+	{
+		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), false);
+		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), false);
+		if (name1Tokens.contains("men") || name2Tokens.contains("men") || name1Tokens.contains("women")
+			|| name2Tokens.contains("women") || name1Tokens.contains("disney") || name2Tokens.contains("disney"))
+		{ // all words should be the
+			// same
+			name1Tokens.removeAll(name2Tokens);
+			name1Tokens.removeAll(Arrays.asList(englishPrepositions));
+			name1Tokens.removeAll(Arrays.asList(commonWordsInEventTitles));
+			if (name1Tokens.size() < 1)
+				return true;
+
+			return false;
+		}
+		else
+			return true;
+
+	}
+
+	private boolean succeededSpecialGroupsSymphoniesRule(String name1, String name2)
+	{
+		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), false);
+		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), false);
+		if (name1Tokens.contains("orchestra") || name2Tokens.contains("symphony") || name2Tokens.contains("orchestra")
+			|| name1Tokens.contains("symphony") || name2Tokens.contains("band") || name1Tokens.contains("band")
+			|| name2Tokens.contains("trio") || name1Tokens.contains("trio") || name1Tokens.contains("soleil")
+			|| name2Tokens.contains("soleil") || name1Tokens.contains("disney") || name2Tokens.contains("disney")
+			|| name1Tokens.contains("lang") || name2Tokens.contains("lang")) // special group 'lang lang'
+		{ // all words should be the
+			// same
+			List<String> name1TokensClone = new ArrayList<String>(name1Tokens);
+			name1Tokens.removeAll(name2Tokens);
+			name2Tokens.removeAll(name1TokensClone);
+			name1Tokens.addAll(name2Tokens);
+			name1Tokens.removeAll(Arrays.asList(this.englishPrepositions));
+			// name1Tokens.removeAll(Arrays.asList(this.commonWordsInEventTitles));
+			if (name1Tokens.size() < 1)
+				return true;
+
+			return false;
+		}
+		else
+			return true;
+
+	}
+
+	public int getAttemptedNameMerge(String name1, String name2)
+	{
+		name1 = name1.replaceAll("[a-z][A-Z]", "$0&$0").replaceAll(".&.", " ");
+		; // suspected word merge if higher case is in the middle of word
+		name2 = name2.replaceAll("[a-z][A-Z]", "$0&$0").replaceAll(".&.", " ");
+
+		name1 = name1.toLowerCase();
+		name2 = name2.toLowerCase();
+		if (name1.equals(name2) || name1.startsWith(name2) || name2.startsWith(name1) || name1.endsWith(name2)
+			|| name1.endsWith(name2) || name1.indexOf(name2) > -1 || name1.indexOf(name2) > -1) // ??
+			return 2;
+		String name2r = name2.replace(" ", "");
+		if (name1.equals(name2r) || name1.startsWith(name2r) || name1.startsWith(name2r) || name1.endsWith(name2r)
+			|| name1.endsWith(name2r))
+			return 1;
+		String name1r = name1.replace(" ", "");
+		if (name1r.equals(name2r) || name1r.startsWith(name2r) || name1r.startsWith(name2) || name1r.endsWith(name2r)
+			|| name1r.endsWith(name2r) || name2r.equals(name1r) || name2r.startsWith(name1r)
+			|| name2r.startsWith(name1) || name2r.endsWith(name1r) || name2r.endsWith(name2)
+
+		)
+			return 1;
+
+		if (stringDistanceMeasurer.measureStringDistance(name1, name2) > 0.95)
+			return 2;
+		if (stringDistanceMeasurer.measureStringDistance(name1, name2) > 0.70)
+			return 1;
+		return 0;
+	}
+
+	private String normalizeGenderAndOtherAttributes(String name1)
+	{
+		name1 = Utils.convertToASCII(name1.replace("/", " ").replace("w/", "with ")).replace('!', ' ').toLowerCase();
+
+		name1 = name1.replace("woman", "women").replace("womans", "women").replace("womens", "women")
+			.replace("women's", "women").replace("woman's", "women");
+		name1 = name1.replace(" man ", " men ").replace(" mans ", " men ").replace(" men's ", " men ")
+			.replace(" man's ", " men ").replace(" mens ", " men ").replace("summer camp", "summer_camp")
+			.replace("gaea theatre festival", "tfestival"); // need regexp for this
+		return name1;
+	}
+
+	/*
+	 * Main semantic similarity function which applies boundary cases rule and focus on web mining rule The main
+	 * criteria for a commonality between titles: to form an entity, searchable on the web
+	 */
+	public DedupResult areNamesSemanticallyCloseWebMineCommonPart(String name1, String name2, String venue)
+	{
+		// normalize gender
+		name1 = normalizeGenderAndOtherAttributes(name1);
+		name2 = normalizeGenderAndOtherAttributes(name2);
+
+		Boolean bShortTitlesSimilarInWebSpace = attemptShortTitlesSimilarityInWebSpace(name1, name2);
+		if (bShortTitlesSimilarInWebSpace)
+			return new DedupResult("Accepted as short title by web mining", 2, true);
+
+		StringBuffer reason = new StringBuffer();
+		List<String> venueToks = removeVenuePart(TextProcessor.fastTokenize(venue.toLowerCase(), false));
+
+		LOG.info("\nComputing similarity between name = '" + name1 + "' and name = '" + name2 + "'");
+		// convert titles into token lists
+		List<String> name1Tokens = removeVenuePart(TextProcessor.fastTokenize(name1.toLowerCase(), true));
+		List<String> name2Tokens = removeVenuePart(TextProcessor.fastTokenize(name2.toLowerCase(), true));
+		// applySubPhraseExtractionRule()
+		Boolean bSameAttrib = verifyEventAttributesPost(name1Tokens, name2Tokens)
+			&& verifyEventAttributesPre(name1Tokens, name2Tokens);
+		if (!bSameAttrib)
+		{
+			LOG.info("similar events but different attributes");
+			return new DedupResult("similar events but different attributes", 0, false);
+		}
+
+		boolean bothSodesSuccess = applyBothSidesRuleEvent(name1, name2);
+		if (!bothSodesSuccess)
+		{
+			return new DedupResult("Failed common words test for sports", 0, false);
+		}
+
+		float dist = (float) LevensteinDistanceFinder.levensteinDistance(name1, name2, 1, 10, 1, 10);
+		if (dist < 5.1)
+		{
+			LOG.info("Found low LevensteinDistance for name1 and name2");
+			return new DedupResult("Found low LevensteinDistance", 2, true);
+		}
+
+		int nameMergeScore = getAttemptedNameMerge(name1, name2);
+		if (nameMergeScore > 0)
+		{
+			LOG.info("Found low NameMerge Distance for name1 and name2");
+			return new DedupResult("Found low  NameMerge Distance", 2, true);
+		}
+
+		// todo take into account order
+		// form common sub-list of tokens
+		name1Tokens.retainAll(name2Tokens);
+		name1Tokens.removeAll(venueToks);
+
+		name1Tokens.removeAll(commonWordsInEventTitles);
+		name1Tokens.removeAll(Arrays.asList(englishPrepositions));
+		name1Tokens = removeDollarWordAndNonAlphaFromList(name1Tokens);
+		// todo : to use full string measure
+		// boundary case: too many words => just do counts
+		float commonPortion = (float) name1Tokens.size() / (float) name2Tokens.size();
+		if (commonPortion > 0.8 || name1Tokens.size() >= 4)
+		{ // after typical
+			// title words
+			// are revomed 4
+			// looks OK
+			LOG.info("Accepted since substantial common part");
+			return new DedupResult("Accepted since substantial common part", Math.max((int) (commonPortion * 5.0), 2),
+				true);
+		}
+		// boundary case: no overlap
+		if (name1Tokens.size() < 1)
+		{
+			LOG.info("Rejected since nothing in common");
+			return new DedupResult("Rejected since nothing in common", 0, false);
+		}
+		// get from list of tokens back to words to get search expression
+		String entityExpression = name1Tokens.toString().replace('[', ' ').replace(']', ' ').replace(',', ' ')
+			.replace("  ", " ").trim();
+		/*
+		 * // now try name merge reduced strings String entityExpression1 = name1TokensC.toString().replace('[',
+		 * ' ').replace(']', ' ').replace(',', ' ') .replace("  ", " ").trim(); String entityExpression2 =
+		 * name2Tokens.toString().replace('[', ' ').replace(']', ' ').replace(',', ' ') .replace("  ", " ").trim();
+		 * 
+		 * nameMergeScore = getAttemptedNameMerge(entityExpression1, entityExpression2); if (nameMergeScore>0){
+		 * LOG.info("Found low NameMerge Distance for REDUCED name1 and name2"); return new
+		 * DedupResult("Found low  NameMerge Distance REDUCED", 2, true);
+		 * 
+		 * }
+		 */
+
+		// Before doing web mining, make sure overlap between titles is NOT a
+		// set of common english words (use the vocabulary)
+		// if all words are common, then NOT an entity
+		if (name1Tokens.size() < 2)
+		{
+			boolean bCommonEnglishWord = false;
+			for (String word : name1Tokens)
+			{
+	//			if (stopList.isCommonWord(word) /*&& mostFrequent1000Words.isMostFrequent1000Word(word)*/)
+	//				bCommonEnglishWord = true;
+			}
+
+			if (bCommonEnglishWord)
+			{
+				LOG.info("Rejected common entity: common word = " + entityExpression);
+				return new DedupResult("Rejected since common entity is common English word = " + entityExpression, 0,
+					false);
+			}
+		}
+		// accept common expression
+		LOG.info("Formed common entity = " + entityExpression);
+		reason.append("Formed common entity = " + entityExpression + "\n");
+		// now go to the web / bing api with this common expression
+		List<HitBase> searchResult = webSearch.runSearch(entityExpression);
+		float entityScore = 0f;
+		if (searchResult != null)
+		{
+			int count = 0;
+			for (HitBase item : searchResult)
+			{
+				String lookup = item.getTitle();
+				LOG.info("Bing hit title = '" + lookup + "'");
+				reason.append("Bing hit title = '" + lookup + "'\n");
+				if (count > 4)
+					break;
+				count++;
+				// if occurrence is not capitalized then rejected, do not take
+				// into account in score
+				if (!isCapitalized(lookup))
+				{
+					LOG.info("Rejected hit title since not capitalized");
+					reason.append("Rejected hit title since not capitalized\n");
+					continue;
+				}
+
+				/*
+				 * if (lookup.indexOf('-')>0 ){ lookup = lookup.split("-")[0]; }
+				 */
+				// now compute overlap between what found on the web for hit's
+				// title and the common expression between events
+				List<String> lookupTokens = tokenizeAndStem(lookup);
+				lookupTokens.retainAll(stemList(name1Tokens));
+				if (lookupTokens.size() >= name1Tokens.size())
+					// increment score if found hit title is acceptable
+					entityScore += 1.0;
+				else
+				{
+					LOG.info("Found hit title " + lookupTokens + " does not cover comonality expr = " + name1Tokens);
+					entityScore += 0.25;
+
+				}
+
+			}
+		}
+		return new DedupResult(reason.toString(), (int) entityScore, entityScore > 1.0);
+	}
+
+	public DedupResult areNamesSemanticallyCloseInWebSearchSpace(String name1, String name2, Float thresh, boolean bStem)
+	{
+
+		if (thresh == null || thresh == 0f)
+		{
+			thresh = 0.8f;
+		}
+
+		// normalize gender
+		name1 = normalizeGenderAndOtherAttributes(name1);
+		name2 = normalizeGenderAndOtherAttributes(name2);
+
+		StringBuffer reason = new StringBuffer();
+
+		boolean bSportsOrOrchestra = !succeededMenWomenSportsRule(name1, name2);
+		if (bSportsOrOrchestra)
+			return new DedupResult("Sports rule: different teams or teams of different venues", 0, false);
+
+		bSportsOrOrchestra = !succeededSpecialGroupsSymphoniesRule(name1, name2);
+		if (bSportsOrOrchestra)
+			return new DedupResult("SpecialGroupsSymphoniesRule: different circus/band", 0, false);
+
+		LOG.info("\nComputing similarity between name = '" + name1 + "' and name = '" + name2 + "'");
+
+		List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), true);
+		List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), true);
+		Boolean bSameAttrib = verifyEventAttributesPost(name1Tokens, name2Tokens)
+			&& verifyEventAttributesPre(name1Tokens, name2Tokens);
+		if (!bSameAttrib)
+		{
+			LOG.info("similar events but different attributes");
+			return new DedupResult("similar events but different attributes", 0, false);
+		}
+
+		List<HitBase> searchResult1 = webSearch.runSearch(name1);
+		List<HitBase> searchResult2 = webSearch.runSearch(name2);
+		int score = 0;
+		if (searchResult1 != null && searchResult2 != null)
+		{
+			for (HitBase item1 : searchResult1)
+			{
+				if (item1.getUrl().indexOf("myspace") > -1 || item1.getUrl().indexOf("wiki") > -1)
+					continue;
+				for (HitBase item2 : searchResult2)
+				{
+					String lookup1 = item1.getTitle().replace("Facebook", "").replace("LinkedIn", "")
+						.replace("MySpace", "");
+					String lookup2 = item2.getTitle().replace("Facebook", "").replace("LinkedIn", "")
+						.replace("MySpace", "");
+					double d = 0;
+					if (bStem)
+						d = stringDistanceMeasurer.measureStringDistance(lookup1, lookup2);
+					else
+						d = stringDistanceMeasurer.measureStringDistanceNoStemming(lookup1, lookup2);
+					if (d > thresh) // 0.8)
+					{
+
+						reason.append("Found common search result title for group names '" + lookup1 + " < > "
+							+ lookup2 + " sim = " + d + "\n");
+						LOG.info(("Found common search result title for group names '" + lookup1 + " < > " + lookup2
+							+ " sim = " + d));
+						score++;
+					}
+
+				}
+			}
+		}
+
+		Boolean bothSidesSuccess = applyBothSidesRule(name1, name2);
+		if (!bothSidesSuccess)
+		{
+			score = 1;
+			reason.append("Failed common words test for sports");
+		}
+		if (score > 0)
+		{
+			Boolean bDifferentGroup = bDifferentGroupOneSubnameOfAnother(name1, name2);
+			if (bDifferentGroup)
+			{
+				score = 1;
+				reason.append("Failed common words test for sports");
+			}
+		}
+		return new DedupResult(reason.toString(), score, score > 1);
+	}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
index 0d2ba00..33b4b00 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
@@ -1,232 +1,232 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.apps.relevanceVocabs;
-
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-
-import org.apache.commons.lang.StringUtils;
-
-import opennlp.tools.parser.Parse;
-import opennlp.tools.textsimilarity.ParseTreeChunk;
-import opennlp.tools.textsimilarity.TextProcessor;
-import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
-import opennlp.tools.util.Span;
-
-public class PhraseProcessor {
-	
-	private ParserChunker2MatcherProcessor nlProc = ParserChunker2MatcherProcessor.getInstance() ;
-	
-	public static boolean allChildNodesArePOSTags(Parse p)
-	{
-		Parse[] subParses = p.getChildren();
-		for (int pi = 0; pi < subParses.length; pi++)
-			if (!((Parse) subParses[pi]).isPosTag())
-				return false;
-		return true;
-	}
-	
-	public ArrayList<String> getNounPhrases(Parse p)
-	{
-		ArrayList<String> nounphrases = new ArrayList<String>();
-
-		Parse[] subparses = p.getChildren();
-		for (int pi = 0; pi < subparses.length; pi++)
-		{
-
-			if (subparses[pi].getType().equals("NP") && allChildNodesArePOSTags(subparses[pi]))
-			{
-				Span _span = subparses[pi].getSpan();
-				nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
-			}
-			else if (!((Parse) subparses[pi]).isPosTag())
-				nounphrases.addAll(getNounPhrases(subparses[pi]));
-		}
-
-		return nounphrases;
-	}
-	
-	public ArrayList<String> getVerbPhrases(Parse p)
-	{
-		ArrayList<String> verbPhrases = new ArrayList<String>();
-
-		Parse[] subparses = p.getChildren();
-		for (int pi = 0; pi < subparses.length; pi++)
-		{
-
-			if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi]))
-			{
-				Span _span = subparses[pi].getSpan();
-				verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
-			}
-			else if (!((Parse) subparses[pi]).isPosTag())
-				verbPhrases.addAll(getNounPhrases(subparses[pi]));
-		}
-
-		return verbPhrases;
-	}
-	
-	// forms phrases from text which are candidate expressions for events lookup
-			public List<ParseTreeChunk> getVerbPhrases(String sentence) {
-				if (sentence==null)
-					return null;
-				if (sentence.split(" ").length ==1) { // this is a word, return empty
-					//queryArrayStr.add( sentence);
-					return null;
-				}
-				if (sentence.length()>100)
-					return null ; // too long of a sentence to parse
-				
-				System.out.println("About to parse: "+sentence);
-				List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 
-				if (groupedChunks.size()<1)
-					return null;
-
-				List<ParseTreeChunk> vPhrases = groupedChunks.get(1);
-				
-				return vPhrases;
-			}
-
-			public List<List<ParseTreeChunk>> getPhrasesOfAllTypes(String sentence) {
-				if (sentence==null)
-					return null;
-				if (sentence.split(" ").length ==1) { // this is a word, return empty
-					//queryArrayStr.add( sentence);
-					return null;
-				}
-				if (sentence.length()>200)
-					return null ; // too long of a sentence to parse
-				
-				System.out.println("About to parse: "+sentence);
-				List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 
-				if (groupedChunks.size()<1)
-					return null;
-
-				return groupedChunks;
-			}
-	
-	// forms phrases from text which are candidate expressions for events lookup
-		public List<String> extractNounPhraseProductNameCandidate(String sentence) {
-			
-			List<String> queryArrayStr = new ArrayList<String>();
-			
-			if (sentence.split(" ").length ==1) { // this is a word, return empty
-				//queryArrayStr.add( sentence);
-				return queryArrayStr;
-			}
-			String quoted1 = StringUtils.substringBetween(sentence, "\"", "\"");
-			String quoted2 = StringUtils.substringBetween(sentence, "\'", "\'");
-			List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 
-			if (groupedChunks.size()<1)
-				return queryArrayStr;
-
-			List<ParseTreeChunk> nPhrases = groupedChunks.get(0);
-
-			for (ParseTreeChunk ch : nPhrases) {
-				String query = "";
-				int size = ch.getLemmas().size();
-				boolean phraseBeingFormed = false;
-				for (int i = 0; i < size; i++) {
-					if ((ch.getPOSs().get(i).startsWith("N") || ch.getPOSs().get(i)
-							.startsWith("J") || ch.getPOSs().get(i).startsWith("CD") ) )
-					//		&& StringUtils.isAlpha(ch.getLemmas().get(i)))
-					{
-						query += ch.getLemmas().get(i) + " ";
-						phraseBeingFormed = true;
-					} else 
-						if ((ch.getPOSs().get(i).startsWith("PR") || ch.getPOSs().get(i).startsWith("IN") || ch.getPOSs().get(i).startsWith("TO")  ) 
-								&& phraseBeingFormed )
-							break;
-						else if (ch.getPOSs().get(i).startsWith("DT") || ch.getPOSs().get(i).startsWith("CC"))
-						continue;
-				}
-				query = query.trim();
-				int len = query.split(" ").length;
-				if (len > 5 || len < 2) // too long or too short
-					continue;
-				
-	/*				
-				if (len < 4 && len>1) { // every word should start with capital
-					String[] qs = query.split(" ");
-					boolean bAccept = true;
-					for (String w : qs) {
-						if (w.toLowerCase().equals(w)) // idf only two words then
-														// has to be person name,
-														// title or geo
-														// location
-							bAccept = false;
-					}
-					if (!bAccept)
-						continue;
-				}
-		*/		
-				 // individual word, possibly a frequent word
-				// if len==1 do nothing
-
-				query = query.trim();
-				queryArrayStr.add(query);
-
-			}
-	/*		
-			if (queryArrayStr.size() < 1) { // release constraints on NP down to 2
-											// keywords
-				for (ParseTreeChunk ch : nPhrases) {
-					String query = "";
-					int size = ch.getLemmas().size();
-
-					for (int i = 0; i < size; i++) {
-						if (ch.getPOSs().get(i).startsWith("N")
-								|| ch.getPOSs().get(i).startsWith("J")) {
-							query += ch.getLemmas().get(i) + " ";
-						}
-					}
-					query = query.trim();
-					int len = query.split(" ").length;
-					if (len < 2)
-						continue;
-
-					query = TextProcessor.fastTokenize(query.toLowerCase(), false)
-							.toString().replace('[', ' ').replace(']', ' ').trim();
-					if (query.length() > 6)
-						queryArrayStr.add(query);
-				}
-			}
-			//queryArrayStr = Utils
-			//		.removeDuplicatesFromQueries(queryArrayStr);
-			if (quoted1 != null
-					&& ((quoted1.length() > 5 && !stopList.isCommonWord(quoted1)) || quoted1
-							.length() > 10))
-				queryArrayStr.add(quoted1);
-			if (quoted2 != null
-					&& ((quoted2.length() > 5 && !stopList.isCommonWord(quoted2)) || quoted2
-							.length() > 10))
-				queryArrayStr.add(quoted2);
-		*/	return queryArrayStr;
-		}
-		
-
-	
-		
-		public static void main(String[] args){
-			String sent = "Appliances and Kitchen Gadgets - CNET Blogs";
-					//"The tablet phenomenon turns Silicon Valley upside down - SiliconValley.com";
-			List<String> res = new PhraseProcessor().extractNounPhraseProductNameCandidate(sent);
-			System.out.println(res);
-		}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+
+import opennlp.tools.parser.Parse;
+import opennlp.tools.textsimilarity.ParseTreeChunk;
+import opennlp.tools.textsimilarity.TextProcessor;
+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
+import opennlp.tools.util.Span;
+
+public class PhraseProcessor {
+	
+	private ParserChunker2MatcherProcessor nlProc = ParserChunker2MatcherProcessor.getInstance() ;
+	
+	public static boolean allChildNodesArePOSTags(Parse p)
+	{
+		Parse[] subParses = p.getChildren();
+		for (int pi = 0; pi < subParses.length; pi++)
+			if (!((Parse) subParses[pi]).isPosTag())
+				return false;
+		return true;
+	}
+	
+	public ArrayList<String> getNounPhrases(Parse p)
+	{
+		ArrayList<String> nounphrases = new ArrayList<String>();
+
+		Parse[] subparses = p.getChildren();
+		for (int pi = 0; pi < subparses.length; pi++)
+		{
+
+			if (subparses[pi].getType().equals("NP") && allChildNodesArePOSTags(subparses[pi]))
+			{
+				Span _span = subparses[pi].getSpan();
+				nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
+			}
+			else if (!((Parse) subparses[pi]).isPosTag())
+				nounphrases.addAll(getNounPhrases(subparses[pi]));
+		}
+
+		return nounphrases;
+	}
+	
+	public ArrayList<String> getVerbPhrases(Parse p)
+	{
+		ArrayList<String> verbPhrases = new ArrayList<String>();
+
+		Parse[] subparses = p.getChildren();
+		for (int pi = 0; pi < subparses.length; pi++)
+		{
+
+			if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi]))
+			{
+				Span _span = subparses[pi].getSpan();
+				verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
+			}
+			else if (!((Parse) subparses[pi]).isPosTag())
+				verbPhrases.addAll(getNounPhrases(subparses[pi]));
+		}
+
+		return verbPhrases;
+	}
+	
+	// forms phrases from text which are candidate expressions for events lookup
+			public List<ParseTreeChunk> getVerbPhrases(String sentence) {
+				if (sentence==null)
+					return null;
+				if (sentence.split(" ").length ==1) { // this is a word, return empty
+					//queryArrayStr.add( sentence);
+					return null;
+				}
+				if (sentence.length()>100)
+					return null ; // too long of a sentence to parse
+				
+				System.out.println("About to parse: "+sentence);
+				List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 
+				if (groupedChunks.size()<1)
+					return null;
+
+				List<ParseTreeChunk> vPhrases = groupedChunks.get(1);
+				
+				return vPhrases;
+			}
+
+			public List<List<ParseTreeChunk>> getPhrasesOfAllTypes(String sentence) {
+				if (sentence==null)
+					return null;
+				if (sentence.split(" ").length ==1) { // this is a word, return empty
+					//queryArrayStr.add( sentence);
+					return null;
+				}
+				if (sentence.length()>200)
+					return null ; // too long of a sentence to parse
+				
+				System.out.println("About to parse: "+sentence);
+				List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 
+				if (groupedChunks.size()<1)
+					return null;
+
+				return groupedChunks;
+			}
+	
+	// forms phrases from text which are candidate expressions for events lookup
+		public List<String> extractNounPhraseProductNameCandidate(String sentence) {
+			
+			List<String> queryArrayStr = new ArrayList<String>();
+			
+			if (sentence.split(" ").length ==1) { // this is a word, return empty
+				//queryArrayStr.add( sentence);
+				return queryArrayStr;
+			}
+			String quoted1 = StringUtils.substringBetween(sentence, "\"", "\"");
+			String quoted2 = StringUtils.substringBetween(sentence, "\'", "\'");
+			List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 
+			if (groupedChunks.size()<1)
+				return queryArrayStr;
+
+			List<ParseTreeChunk> nPhrases = groupedChunks.get(0);
+
+			for (ParseTreeChunk ch : nPhrases) {
+				String query = "";
+				int size = ch.getLemmas().size();
+				boolean phraseBeingFormed = false;
+				for (int i = 0; i < size; i++) {
+					if ((ch.getPOSs().get(i).startsWith("N") || ch.getPOSs().get(i)
+							.startsWith("J") || ch.getPOSs().get(i).startsWith("CD") ) )
+					//		&& StringUtils.isAlpha(ch.getLemmas().get(i)))
+					{
+						query += ch.getLemmas().get(i) + " ";
+						phraseBeingFormed = true;
+					} else 
+						if ((ch.getPOSs().get(i).startsWith("PR") || ch.getPOSs().get(i).startsWith("IN") || ch.getPOSs().get(i).startsWith("TO")  ) 
+								&& phraseBeingFormed )
+							break;
+						else if (ch.getPOSs().get(i).startsWith("DT") || ch.getPOSs().get(i).startsWith("CC"))
+						continue;
+				}
+				query = query.trim();
+				int len = query.split(" ").length;
+				if (len > 5 || len < 2) // too long or too short
+					continue;
+				
+	/*				
+				if (len < 4 && len>1) { // every word should start with capital
+					String[] qs = query.split(" ");
+					boolean bAccept = true;
+					for (String w : qs) {
+						if (w.toLowerCase().equals(w)) // idf only two words then
+														// has to be person name,
+														// title or geo
+														// location
+							bAccept = false;
+					}
+					if (!bAccept)
+						continue;
+				}
+		*/		
+				 // individual word, possibly a frequent word
+				// if len==1 do nothing
+
+				query = query.trim();
+				queryArrayStr.add(query);
+
+			}
+	/*		
+			if (queryArrayStr.size() < 1) { // release constraints on NP down to 2
+											// keywords
+				for (ParseTreeChunk ch : nPhrases) {
+					String query = "";
+					int size = ch.getLemmas().size();
+
+					for (int i = 0; i < size; i++) {
+						if (ch.getPOSs().get(i).startsWith("N")
+								|| ch.getPOSs().get(i).startsWith("J")) {
+							query += ch.getLemmas().get(i) + " ";
+						}
+					}
+					query = query.trim();
+					int len = query.split(" ").length;
+					if (len < 2)
+						continue;
+
+					query = TextProcessor.fastTokenize(query.toLowerCase(), false)
+							.toString().replace('[', ' ').replace(']', ' ').trim();
+					if (query.length() > 6)
+						queryArrayStr.add(query);
+				}
+			}
+			//queryArrayStr = Utils
+			//		.removeDuplicatesFromQueries(queryArrayStr);
+			if (quoted1 != null
+					&& ((quoted1.length() > 5 && !stopList.isCommonWord(quoted1)) || quoted1
+							.length() > 10))
+				queryArrayStr.add(quoted1);
+			if (quoted2 != null
+					&& ((quoted2.length() > 5 && !stopList.isCommonWord(quoted2)) || quoted2
+							.length() > 10))
+				queryArrayStr.add(quoted2);
+		*/	return queryArrayStr;
+		}
+		
+
+	
+		
+		public static void main(String[] args){
+			String sent = "Appliances and Kitchen Gadgets - CNET Blogs";
+					//"The tablet phenomenon turns Silicon Valley upside down - SiliconValley.com";
+			List<String> res = new PhraseProcessor().extractNounPhraseProductNameCandidate(sent);
+			System.out.println(res);
+		}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
index 09d90b1..e71fbd6 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
@@ -1,103 +1,103 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.apps.relevanceVocabs;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.Serializable;
-import java.net.URL;
-import java.net.URLConnection;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
-
-public class SynonymListFilter {
-	SynonymMap map=null;
-	
-	public SynonymListFilter(String dir){
-		dir = dir.replace("maps/analytics","");
-		try {
-			map = new SynonymMap( new FileInputStream(dir+"wn_s.pl"));
-		} catch (IOException e) {
-			e.printStackTrace();
-		}
-	}
-	
-	protected static Map<String, List<String>> filteredKeyword_synonyms = new HashMap<String, List<String>>();
-
-	static public List<String> getFileLines(File aFile) {
-
-		List<String> items = new ArrayList<String>();
-
-		StringBuilder contents = new StringBuilder();		    
-		try {
-
-			BufferedReader input =  new BufferedReader(new FileReader(aFile));
-			try {
-				String line = null; //not declared within while loop
-				while (( line = input.readLine()) != null){
-					int endOfWord = line.indexOf(';');
-					if (endOfWord>2)
-						line = line.substring(1, endOfWord -1 );
-
-					items.add(line);
-
-				}
-			}
-			finally {
-				input.close();
-			}
-		}
-		catch (IOException ex){
-			ex.printStackTrace();
-		}
-
-		return items;
-	}
-	public String getSynonym (String word){
-			String[] synonyms = map.getSynonyms(word);
-			if (synonyms==null || synonyms.length<1)
-				return null;
-			Random rand = new Random();
-			int index = (int) Math.floor(rand.nextDouble()*(double)synonyms.length);
-			System.out.println("Found synonyms "+Arrays.asList(synonyms).toString()+ " | selected synonym = "+synonyms[index] +" | for the input = "+ word);
-			return synonyms[index];
-			
-	}	
-	public static void main(String[] args){
-		SynonymListFilter filter = new  SynonymListFilter("/src/test/resources");
-		String syn = filter.getSynonym("bring");
-		syn = filter.getSynonym("yell");
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+
+public class SynonymListFilter {
+	SynonymMap map=null;
+	
+	public SynonymListFilter(String dir){
+		dir = dir.replace("maps/analytics","");
+		try {
+			map = new SynonymMap( new FileInputStream(dir+"wn_s.pl"));
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+	
+	protected static Map<String, List<String>> filteredKeyword_synonyms = new HashMap<String, List<String>>();
+
+	static public List<String> getFileLines(File aFile) {
+
+		List<String> items = new ArrayList<String>();
+
+		StringBuilder contents = new StringBuilder();		    
+		try {
+
+			BufferedReader input =  new BufferedReader(new FileReader(aFile));
+			try {
+				String line = null; //not declared within while loop
+				while (( line = input.readLine()) != null){
+					int endOfWord = line.indexOf(';');
+					if (endOfWord>2)
+						line = line.substring(1, endOfWord -1 );
+
+					items.add(line);
+
+				}
+			}
+			finally {
+				input.close();
+			}
+		}
+		catch (IOException ex){
+			ex.printStackTrace();
+		}
+
+		return items;
+	}
+	public String getSynonym (String word){
+			String[] synonyms = map.getSynonyms(word);
+			if (synonyms==null || synonyms.length<1)
+				return null;
+			Random rand = new Random();
+			int index = (int) Math.floor(rand.nextDouble()*(double)synonyms.length);
+			System.out.println("Found synonyms "+Arrays.asList(synonyms).toString()+ " | selected synonym = "+synonyms[index] +" | for the input = "+ word);
+			return synonyms[index];
+			
+	}	
+	public static void main(String[] args){
+		SynonymListFilter filter = new  SynonymListFilter("/src/test/resources");
+		String syn = filter.getSynonym("bring");
+		syn = filter.getSynonym("yell");
+	}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
index 7e680de..283435a 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
@@ -1,353 +1,353 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.apps.relevanceVocabs;
-
-import java.io.IOException;
-  import java.io.InputStream;
-   import java.nio.ByteBuffer;
-   import java.nio.charset.Charset;
-   import java.util.ArrayList;
-   import java.util.Arrays;
-   import java.util.HashMap;
-   import java.util.Iterator;
-   import java.util.Map;
-   import java.util.TreeMap;
-   import java.util.TreeSet;
-   
-   
-   public class SynonymMap {
-   
-     /** the index data; Map<String word, String[] synonyms> */
-     private final HashMap<String,String[]> table;
-     
-     private static final String[] EMPTY = new String[0];
-     
-     private static final boolean DEBUG = false;
-   
-     /**
-      * Constructs an instance, loading WordNet synonym data from the given input
-      * stream. Finally closes the stream. The words in the stream must be in
-      * UTF-8 or a compatible subset (for example ASCII, MacRoman, etc.).
-      * 
-      * @param input
-      *            the stream to read from (null indicates an empty synonym map)
-      * @throws IOException
-      *             if an error occurred while reading the stream.
-      */
-     public SynonymMap(InputStream input) throws IOException {
-       this.table = input == null ? new HashMap<String,String[]>(0) : read(toByteArray(input));
-     }
-     
-     /**
-      * Returns the synonym set for the given word, sorted ascending.
-      * 
-      * @param word
-      *            the word to lookup (must be in lowercase).
-      * @return the synonyms; a set of zero or more words, sorted ascending, each
-      *         word containing lowercase characters that satisfy
-      *         <code>Character.isLetter()</code>.
-      */
-     public String[] getSynonyms(String word) {
-       String[] synonyms = table.get(word);
-       if (synonyms == null) return EMPTY;
-       String[] copy = new String[synonyms.length]; // copy for guaranteed immutability
-       System.arraycopy(synonyms, 0, copy, 0, synonyms.length);
-       return copy;
-     }
-     
-     /**
-      * Returns a String representation of the index data for debugging purposes.
-      * 
-      * @return a String representation
-      */
-     @Override
-     public String toString() {
-       StringBuilder buf = new StringBuilder();
-       Iterator<String> iter = new TreeMap<String,String[]>(table).keySet().iterator();
-       int count = 0;
-       int f0 = 0;
-       int f1 = 0;
-       int f2 = 0;
-       int f3 = 0;
-       
-       while (iter.hasNext()) {
-         String word = iter.next();
-         buf.append(word + ":");
-         String[] synonyms = getSynonyms(word);
-         buf.append(Arrays.asList(synonyms));
-         buf.append("\n");
-         count += synonyms.length;
-         if (synonyms.length == 0) f0++;
-         if (synonyms.length == 1) f1++;
-         if (synonyms.length == 2) f2++;
-         if (synonyms.length == 3) f3++;
-       }
-       
-       buf.append("\n\nkeys=" + table.size() + ", synonyms=" + count + ", f0=" + f0 +", f1=" + f1 + ", f2=" + f2 + ", f3=" + f3);
-       return buf.toString();
-     }
-     
-     /**
-      * Analyzes/transforms the given word on input stream loading. This default implementation simply
-      * lowercases the word. Override this method with a custom stemming
-      * algorithm or similar, if desired.
-      * 
-      * @param word
-      *            the word to analyze
-      * @return the same word, or a different word (or null to indicate that the
-      *         word should be ignored)
-      */
-     protected String analyze(String word) {
-       return word.toLowerCase();
-     }
-   
-     private static boolean isValid(String str) {
-       for (int i=str.length(); --i >= 0; ) {
-         if (!Character.isLetter(str.charAt(i))) return false;
-       }
-       return true;
-     }
-   
-     private HashMap<String,String[]> read(byte[] data) {
-       int WORDS  = (int) (76401 / 0.7); // presizing
-       int GROUPS = (int) (88022 / 0.7); // presizing
-       HashMap<String,ArrayList<Integer>> word2Groups = new HashMap<String,ArrayList<Integer>>(WORDS);  // Map<String word, int[] groups>
-       HashMap<Integer,ArrayList<String>> group2Words = new HashMap<Integer,ArrayList<String>>(GROUPS); // Map<int group, String[] words>
-       HashMap<String,String> internedWords = new HashMap<String,String>(WORDS);// Map<String word, String word>
-   
-       Charset charset = Charset.forName("UTF-8");
-       int lastNum = -1;
-       Integer lastGroup = null;
-       int len = data.length;
-       int i=0;
-       
-       while (i < len) { // until EOF
-         /* Part A: Parse a line */
-         
-         // scan to beginning of group
-         while (i < len && data[i] != '(') i++;
-         if (i >= len) break; // EOF
-         i++;
-         
-         // parse group
-         int num = 0;
-         while (i < len && data[i] != ',') {
-           num = 10*num + (data[i] - 48);
-           i++;
-         }
-         i++;
-   //      if (DEBUG) System.err.println("num="+ num);
-         
-         // scan to beginning of word
-         while (i < len && data[i] != '\'') i++;
-         i++;
-     
-         // scan to end of word
-         int start = i;
-         do {
-           while (i < len && data[i] != '\'') i++;
-           i++;
-         } while (i < len && data[i] != ','); // word must end with "',"
-         
-         if (i >= len) break; // EOF
-         String word = charset.decode(ByteBuffer.wrap(data, start, i-start-1)).toString();
-   //      String word = new String(data, 0, start, i-start-1); // ASCII
-         
-         /*
-          * Part B: ignore phrases (with spaces and hyphens) and
-          * non-alphabetic words, and let user customize word (e.g. do some
-          * stemming)
-          */
-         if (!isValid(word)) continue; // ignore
-         word = analyze(word);
-         if (word == null || word.length() == 0) continue; // ignore
-         
-         
-         /* Part C: Add (group,word) to tables */
-         
-         // ensure compact string representation, minimizing memory overhead
-         String w = internedWords.get(word);
-         if (w == null) {
-           word = new String(word); // ensure compact string
-           internedWords.put(word, word);
-         } else {
-           word = w;
-         }
-         
-         Integer group = lastGroup;
-         if (num != lastNum) {
-           group = Integer.valueOf(num);
-           lastGroup = group;
-           lastNum = num;
-         }
-         
-         // add word --> group
-         ArrayList<Integer> groups =  word2Groups.get(word);
-         if (groups == null) {
-           groups = new ArrayList<Integer>(1);
-           word2Groups.put(word, groups);
-         }
-         groups.add(group);
-   
-         // add group --> word
-         ArrayList<String> words = group2Words.get(group);
-         if (words == null) {
-           words = new ArrayList<String>(1);
-           group2Words.put(group, words);
-         } 
-         words.add(word);
-       }
-       
-       
-       /* Part D: compute index data structure */
-       HashMap<String,String[]> word2Syns = createIndex(word2Groups, group2Words);    
-           
-       /* Part E: minimize memory consumption by a factor 3 (or so) */
-   //    if (true) return word2Syns;
-       word2Groups = null; // help gc
-       //TODO: word2Groups.clear(); would be more appropriate  ? 
-       group2Words = null; // help gc
-       //TODO: group2Words.clear(); would be more appropriate  ? 
-       
-       return optimize(word2Syns, internedWords);
-     }
-     
-    private HashMap<String,String[]> createIndex(Map<String,ArrayList<Integer>> word2Groups, Map<Integer,ArrayList<String>> group2Words) {
-       HashMap<String,String[]> word2Syns = new HashMap<String,String[]>();
-       
-       for (final Map.Entry<String,ArrayList<Integer>> entry : word2Groups.entrySet()) { // for each word
-         ArrayList<Integer> group = entry.getValue();     
-         String word = entry.getKey();
-         
-   //      HashSet synonyms = new HashSet();
-         TreeSet<String> synonyms = new TreeSet<String>();
-         for (int i=group.size(); --i >= 0; ) { // for each groupID of word
-           ArrayList<String> words = group2Words.get(group.get(i));
-           for (int j=words.size(); --j >= 0; ) { // add all words       
-             String synonym = words.get(j); // note that w and word are interned
-             if (synonym != word) { // a word is implicitly it's own synonym
-               synonyms.add(synonym);
-             }
-           }
-         }
-   
-         int size = synonyms.size();
-         if (size > 0) {
-           String[] syns = new String[size];
-           if (size == 1)  
-             syns[0] = synonyms.first();
-           else
-             synonyms.toArray(syns);
-   //        if (syns.length > 1) Arrays.sort(syns);
-   //        if (DEBUG) System.err.println("word=" + word + ":" + Arrays.asList(syns));
-           word2Syns.put(word, syns);
-         }
-       }
-     
-       return word2Syns;
-     }
-   
-     private HashMap<String,String[]> optimize(HashMap<String,String[]> word2Syns, HashMap<String,String> internedWords) {
-       if (DEBUG) {
-         System.err.println("before gc");
-         for (int i=0; i < 10; i++) System.gc();
-         System.err.println("after gc");
-       }
-       
-       // collect entries
-       int len = 0;
-       int size = word2Syns.size();
-       String[][] allSynonyms = new String[size][];
-       String[] words = new String[size];
-       Iterator<Map.Entry<String,String[]>> iter = word2Syns.entrySet().iterator();
-       for (int j=0; j < size; j++) {
-         Map.Entry<String,String[]> entry = iter.next();
-         allSynonyms[j] = entry.getValue(); 
-         words[j] = entry.getKey();
-         len += words[j].length();
-       }
-       
-       // assemble large string containing all words
-       StringBuilder buf = new StringBuilder(len);
-       for (int j=0; j < size; j++) buf.append(words[j]);
-       String allWords = new String(buf.toString()); // ensure compact string across JDK versions
-       buf = null;
-       
-       // intern words at app level via memory-overlaid substrings
-       for (int p=0, j=0; j < size; j++) {
-         String word = words[j];
-         internedWords.put(word, allWords.substring(p, p + word.length()));
-         p += word.length();
-       }
-       
-       // replace words with interned words
-       for (int j=0; j < size; j++) {
-         String[] syns = allSynonyms[j];
-         for (int k=syns.length; --k >= 0; ) {
-           syns[k] = internedWords.get(syns[k]);
-         }
-         word2Syns.remove(words[j]);
-         word2Syns.put(internedWords.get(words[j]), syns);
-      }
-       
-       if (DEBUG) {
-         words = null;
-         allSynonyms = null;
-         internedWords = null;
-         allWords = null;
-         System.err.println("before gc");
-         for (int i=0; i < 10; i++) System.gc();
-         System.err.println("after gc");
-       }
-       return word2Syns;
-     }
-     
-     // the following utility methods below are copied from Apache style Nux library - see http://dsd.lbl.gov/nux
-     private static byte[] toByteArray(InputStream input) throws IOException {
-       try {
-         // safe and fast even if input.available() behaves weird or buggy
-         int len = Math.max(256, input.available());
-         byte[] buffer = new byte[len];
-         byte[] output = new byte[len];
-         
-         len = 0;
-         int n;
-         while ((n = input.read(buffer)) >= 0) {
-           if (len + n > output.length) { // grow capacity
-             byte tmp[] = new byte[Math.max(output.length << 1, len + n)];
-             System.arraycopy(output, 0, tmp, 0, len);
-             System.arraycopy(buffer, 0, tmp, len, n);
-             buffer = output; // use larger buffer for future larger bulk reads
-             output = tmp;
-           } else {
-             System.arraycopy(buffer, 0, output, len, n);
-           }
-           len += n;
-         }
-   
-         if (len == output.length) return output;
-         buffer = null; // help gc
-         buffer = new byte[len];
-         System.arraycopy(output, 0, buffer, 0, len);
-         return buffer;
-       } finally {
-         if (input != null) input.close();
-       }
-     }
-     
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.io.IOException;
+  import java.io.InputStream;
+   import java.nio.ByteBuffer;
+   import java.nio.charset.Charset;
+   import java.util.ArrayList;
+   import java.util.Arrays;
+   import java.util.HashMap;
+   import java.util.Iterator;
+   import java.util.Map;
+   import java.util.TreeMap;
+   import java.util.TreeSet;
+   
+   
+   public class SynonymMap {
+   
+     /** the index data; Map<String word, String[] synonyms> */
+     private final HashMap<String,String[]> table;
+     
+     private static final String[] EMPTY = new String[0];
+     
+     private static final boolean DEBUG = false;
+   
+     /**
+      * Constructs an instance, loading WordNet synonym data from the given input
+      * stream. Finally closes the stream. The words in the stream must be in
+      * UTF-8 or a compatible subset (for example ASCII, MacRoman, etc.).
+      * 
+      * @param input
+      *            the stream to read from (null indicates an empty synonym map)
+      * @throws IOException
+      *             if an error occurred while reading the stream.
+      */
+     public SynonymMap(InputStream input) throws IOException {
+       this.table = input == null ? new HashMap<String,String[]>(0) : read(toByteArray(input));
+     }
+     
+     /**
+      * Returns the synonym set for the given word, sorted ascending.
+      * 
+      * @param word
+      *            the word to lookup (must be in lowercase).
+      * @return the synonyms; a set of zero or more words, sorted ascending, each
+      *         word containing lowercase characters that satisfy
+      *         <code>Character.isLetter()</code>.
+      */
+     public String[] getSynonyms(String word) {
+       String[] synonyms = table.get(word);
+       if (synonyms == null) return EMPTY;
+       String[] copy = new String[synonyms.length]; // copy for guaranteed immutability
+       System.arraycopy(synonyms, 0, copy, 0, synonyms.length);
+       return copy;
+     }
+     
+     /**
+      * Returns a String representation of the index data for debugging purposes.
+      * 
+      * @return a String representation
+      */
+     @Override
+     public String toString() {
+       StringBuilder buf = new StringBuilder();
+       Iterator<String> iter = new TreeMap<String,String[]>(table).keySet().iterator();
+       int count = 0;
+       int f0 = 0;
+       int f1 = 0;
+       int f2 = 0;
+       int f3 = 0;
+       
+       while (iter.hasNext()) {
+         String word = iter.next();
+         buf.append(word + ":");
+         String[] synonyms = getSynonyms(word);
+         buf.append(Arrays.asList(synonyms));
+         buf.append("\n");
+         count += synonyms.length;
+         if (synonyms.length == 0) f0++;
+         if (synonyms.length == 1) f1++;
+         if (synonyms.length == 2) f2++;
+         if (synonyms.length == 3) f3++;
+       }
+       
+       buf.append("\n\nkeys=" + table.size() + ", synonyms=" + count + ", f0=" + f0 +", f1=" + f1 + ", f2=" + f2 + ", f3=" + f3);
+       return buf.toString();
+     }
+     
+     /**
+      * Analyzes/transforms the given word on input stream loading. This default implementation simply
+      * lowercases the word. Override this method with a custom stemming
+      * algorithm or similar, if desired.
+      * 
+      * @param word
+      *            the word to analyze
+      * @return the same word, or a different word (or null to indicate that the
+      *         word should be ignored)
+      */
+     protected String analyze(String word) {
+       return word.toLowerCase();
+     }
+   
+     private static boolean isValid(String str) {
+       for (int i=str.length(); --i >= 0; ) {
+         if (!Character.isLetter(str.charAt(i))) return false;
+       }
+       return true;
+     }
+   
+     private HashMap<String,String[]> read(byte[] data) {
+       int WORDS  = (int) (76401 / 0.7); // presizing
+       int GROUPS = (int) (88022 / 0.7); // presizing
+       HashMap<String,ArrayList<Integer>> word2Groups = new HashMap<String,ArrayList<Integer>>(WORDS);  // Map<String word, int[] groups>
+       HashMap<Integer,ArrayList<String>> group2Words = new HashMap<Integer,ArrayList<String>>(GROUPS); // Map<int group, String[] words>
+       HashMap<String,String> internedWords = new HashMap<String,String>(WORDS);// Map<String word, String word>
+   
+       Charset charset = Charset.forName("UTF-8");
+       int lastNum = -1;
+       Integer lastGroup = null;
+       int len = data.length;
+       int i=0;
+       
+       while (i < len) { // until EOF
+         /* Part A: Parse a line */
+         
+         // scan to beginning of group
+         while (i < len && data[i] != '(') i++;
+         if (i >= len) break; // EOF
+         i++;
+         
+         // parse group
+         int num = 0;
+         while (i < len && data[i] != ',') {
+           num = 10*num + (data[i] - 48);
+           i++;
+         }
+         i++;
+   //      if (DEBUG) System.err.println("num="+ num);
+         
+         // scan to beginning of word
+         while (i < len && data[i] != '\'') i++;
+         i++;
+     
+         // scan to end of word
+         int start = i;
+         do {
+           while (i < len && data[i] != '\'') i++;
+           i++;
+         } while (i < len && data[i] != ','); // word must end with "',"
+         
+         if (i >= len) break; // EOF
+         String word = charset.decode(ByteBuffer.wrap(data, start, i-start-1)).toString();
+   //      String word = new String(data, 0, start, i-start-1); // ASCII
+         
+         /*
+          * Part B: ignore phrases (with spaces and hyphens) and
+          * non-alphabetic words, and let user customize word (e.g. do some
+          * stemming)
+          */
+         if (!isValid(word)) continue; // ignore
+         word = analyze(word);
+         if (word == null || word.length() == 0) continue; // ignore
+         
+         
+         /* Part C: Add (group,word) to tables */
+         
+         // ensure compact string representation, minimizing memory overhead
+         String w = internedWords.get(word);
+         if (w == null) {
+           word = new String(word); // ensure compact string
+           internedWords.put(word, word);
+         } else {
+           word = w;
+         }
+         
+         Integer group = lastGroup;
+         if (num != lastNum) {
+           group = Integer.valueOf(num);
+           lastGroup = group;
+           lastNum = num;
+         }
+         
+         // add word --> group
+         ArrayList<Integer> groups =  word2Groups.get(word);
+         if (groups == null) {
+           groups = new ArrayList<Integer>(1);
+           word2Groups.put(word, groups);
+         }
+         groups.add(group);
+   
+         // add group --> word
+         ArrayList<String> words = group2Words.get(group);
+         if (words == null) {
+           words = new ArrayList<String>(1);
+           group2Words.put(group, words);
+         } 
+         words.add(word);
+       }
+       
+       
+       /* Part D: compute index data structure */
+       HashMap<String,String[]> word2Syns = createIndex(word2Groups, group2Words);    
+           
+       /* Part E: minimize memory consumption by a factor 3 (or so) */
+   //    if (true) return word2Syns;
+       word2Groups = null; // help gc
+       //TODO: word2Groups.clear(); would be more appropriate  ? 
+       group2Words = null; // help gc
+       //TODO: group2Words.clear(); would be more appropriate  ? 
+       
+       return optimize(word2Syns, internedWords);
+     }
+     
+    private HashMap<String,String[]> createIndex(Map<String,ArrayList<Integer>> word2Groups, Map<Integer,ArrayList<String>> group2Words) {
+       HashMap<String,String[]> word2Syns = new HashMap<String,String[]>();
+       
+       for (final Map.Entry<String,ArrayList<Integer>> entry : word2Groups.entrySet()) { // for each word
+         ArrayList<Integer> group = entry.getValue();     
+         String word = entry.getKey();
+         
+   //      HashSet synonyms = new HashSet();
+         TreeSet<String> synonyms = new TreeSet<String>();
+         for (int i=group.size(); --i >= 0; ) { // for each groupID of word
+           ArrayList<String> words = group2Words.get(group.get(i));
+           for (int j=words.size(); --j >= 0; ) { // add all words       
+             String synonym = words.get(j); // note that w and word are interned
+             if (synonym != word) { // a word is implicitly it's own synonym
+               synonyms.add(synonym);
+             }
+           }
+         }
+   
+         int size = synonyms.size();
+         if (size > 0) {
+           String[] syns = new String[size];
+           if (size == 1)  
+             syns[0] = synonyms.first();
+           else
+             synonyms.toArray(syns);
+   //        if (syns.length > 1) Arrays.sort(syns);
+   //        if (DEBUG) System.err.println("word=" + word + ":" + Arrays.asList(syns));
+           word2Syns.put(word, syns);
+         }
+       }
+     
+       return word2Syns;
+     }
+   
+     private HashMap<String,String[]> optimize(HashMap<String,String[]> word2Syns, HashMap<String,String> internedWords) {
+       if (DEBUG) {
+         System.err.println("before gc");
+         for (int i=0; i < 10; i++) System.gc();
+         System.err.println("after gc");
+       }
+       
+       // collect entries
+       int len = 0;
+       int size = word2Syns.size();
+       String[][] allSynonyms = new String[size][];
+       String[] words = new String[size];
+       Iterator<Map.Entry<String,String[]>> iter = word2Syns.entrySet().iterator();
+       for (int j=0; j < size; j++) {
+         Map.Entry<String,String[]> entry = iter.next();
+         allSynonyms[j] = entry.getValue(); 
+         words[j] = entry.getKey();
+         len += words[j].length();
+       }
+       
+       // assemble large string containing all words
+       StringBuilder buf = new StringBuilder(len);
+       for (int j=0; j < size; j++) buf.append(words[j]);
+       String allWords = new String(buf.toString()); // ensure compact string across JDK versions
+       buf = null;
+       
+       // intern words at app level via memory-overlaid substrings
+       for (int p=0, j=0; j < size; j++) {
+         String word = words[j];
+         internedWords.put(word, allWords.substring(p, p + word.length()));
+         p += word.length();
+       }
+       
+       // replace words with interned words
+       for (int j=0; j < size; j++) {
+         String[] syns = allSynonyms[j];
+         for (int k=syns.length; --k >= 0; ) {
+           syns[k] = internedWords.get(syns[k]);
+         }
+         word2Syns.remove(words[j]);
+         word2Syns.put(internedWords.get(words[j]), syns);
+      }
+       
+       if (DEBUG) {
+         words = null;
+         allSynonyms = null;
+         internedWords = null;
+         allWords = null;
+         System.err.println("before gc");
+         for (int i=0; i < 10; i++) System.gc();
+         System.err.println("after gc");
+       }
+       return word2Syns;
+     }
+     
+     // the following utility methods below are copied from Apache style Nux library - see http://dsd.lbl.gov/nux
+     private static byte[] toByteArray(InputStream input) throws IOException {
+       try {
+         // safe and fast even if input.available() behaves weird or buggy
+         int len = Math.max(256, input.available());
+         byte[] buffer = new byte[len];
+         byte[] output = new byte[len];
+         
+         len = 0;
+         int n;
+         while ((n = input.read(buffer)) >= 0) {
+           if (len + n > output.length) { // grow capacity
+             byte tmp[] = new byte[Math.max(output.length << 1, len + n)];
+             System.arraycopy(output, 0, tmp, 0, len);
+             System.arraycopy(buffer, 0, tmp, len, n);
+             buffer = output; // use larger buffer for future larger bulk reads
+             output = tmp;
+           } else {
+             System.arraycopy(buffer, 0, output, len, n);
+           }
+           len += n;
+         }
+   
+         if (len == output.length) return output;
+         buffer = null; // help gc
+         buffer = new byte[len];
+         System.arraycopy(output, 0, buffer, 0, len);
+         return buffer;
+       } finally {
+         if (input != null) input.close();
+       }
+     }
+     
 }
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java
index b1afe09..08f0ac1 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java
@@ -1,68 +1,68 @@
-package opennlp.tools.apps.review_builder;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.similarity.apps.BingQueryRunner;
-import opennlp.tools.similarity.apps.HitBase;
-
-import org.apache.commons.lang.StringUtils;
-
-public class BingAPIProductSearchManager {
-	BingQueryRunner search = new BingQueryRunner();
-
-	public List<HitBase> findProductByName(String name, int count){
-		List<HitBase> foundFBPages = search.runSearch("site:amazon.com"+" "+name + " reviews"
-				, 10);
-		List<HitBase> results = new ArrayList<HitBase>();
-		int ct=0;
-		for(HitBase h: foundFBPages){
-			if (ct>=count) break; ct++; 
-			String title = h.getTitle().toLowerCase();
-			if (h.getUrl().indexOf("amazon.com")<0)
-				continue;
-			String[] merchantWords = name.toLowerCase().split(" ");
-			int overlapCount=0;
-/*			for(String commonWord:merchantWords){
-				if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){
-					overlapCount++;
-					System.out.println(" found word "+ commonWord + " in title = "+title);
-				}
-			}
-			float coverage = (float)overlapCount/(float) (merchantWords.length);
-			if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))
-*/				results.add(h);
-		}
-		return results;
-	}
-	
-	public List<HitBase> findProductByNameNoReview(String name, int count){
-		List<HitBase> foundFBPages = search.runSearch(name, count);
-		List<HitBase> results = new ArrayList<HitBase>();
-		int ct=0;
-		for(HitBase h: foundFBPages){
-			if (ct>=count) break; ct++; 
-			String title = h.getTitle().toLowerCase();
-			String[] merchantWords = name.toLowerCase().split(" ");
-			int overlapCount=0;
-			for(String commonWord:merchantWords){
-				if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){
-					overlapCount++;
-					System.out.println(" found word "+ commonWord + " in title = "+title);
-				}
-			}
-			float coverage = (float)overlapCount/(float) (merchantWords.length);
-			if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))
-				results.add(h);
-		}
-		return results;
-	}
-
-	
-
-	public static void main(String[] args){
-		BingAPIProductSearchManager man = new BingAPIProductSearchManager ();
-		List<HitBase> res = man.findProductByName("chain saw", 5);
-		System.out.println(res);  	
-	}
-}
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.BingQueryRunner;
+import opennlp.tools.similarity.apps.HitBase;
+
+import org.apache.commons.lang.StringUtils;
+
+public class BingAPIProductSearchManager {
+	BingQueryRunner search = new BingQueryRunner();
+
+	public List<HitBase> findProductByName(String name, int count){
+		List<HitBase> foundFBPages = search.runSearch("site:amazon.com"+" "+name + " reviews"
+				, 10);
+		List<HitBase> results = new ArrayList<HitBase>();
+		int ct=0;
+		for(HitBase h: foundFBPages){
+			if (ct>=count) break; ct++; 
+			String title = h.getTitle().toLowerCase();
+			if (h.getUrl().indexOf("amazon.com")<0)
+				continue;
+			String[] merchantWords = name.toLowerCase().split(" ");
+			int overlapCount=0;
+/*			for(String commonWord:merchantWords){
+				if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){
+					overlapCount++;
+					System.out.println(" found word "+ commonWord + " in title = "+title);
+				}
+			}
+			float coverage = (float)overlapCount/(float) (merchantWords.length);
+			if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))
+*/				results.add(h);
+		}
+		return results;
+	}
+	
+	public List<HitBase> findProductByNameNoReview(String name, int count){
+		List<HitBase> foundFBPages = search.runSearch(name, count);
+		List<HitBase> results = new ArrayList<HitBase>();
+		int ct=0;
+		for(HitBase h: foundFBPages){
+			if (ct>=count) break; ct++; 
+			String title = h.getTitle().toLowerCase();
+			String[] merchantWords = name.toLowerCase().split(" ");
+			int overlapCount=0;
+			for(String commonWord:merchantWords){
+				if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){
+					overlapCount++;
+					System.out.println(" found word "+ commonWord + " in title = "+title);
+				}
+			}
+			float coverage = (float)overlapCount/(float) (merchantWords.length);
+			if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))
+				results.add(h);
+		}
+		return results;
+	}
+
+	
+
+	public static void main(String[] args){
+		BingAPIProductSearchManager man = new BingAPIProductSearchManager ();
+		List<HitBase> res = man.findProductByName("chain saw", 5);
+		System.out.println(res);  	
+	}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java
index 1cfd7e6..4d6e0dd 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java
@@ -1,141 +1,141 @@
-package opennlp.tools.apps.review_builder;
-
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.List;
-import org.apache.commons.lang.StringUtils;
-
-import opennlp.tools.jsmlearning.ProfileReaderWriter;
-import opennlp.tools.similarity.apps.utils.PageFetcher;
-import com.restfb.Connection;
-import com.restfb.DefaultFacebookClient;
-import com.restfb.FacebookClient;
-import com.restfb.Parameter;
-import com.restfb.exception.FacebookException;
-import com.restfb.types.Event;
-import com.restfb.types.Page;
-
-
-public class FBOpenGraphSearchManager {
-
-	public List<String[]> profiles = null;
-	protected FacebookClient mFBClient;
-	protected PageFetcher pageFetcher = new PageFetcher();
-	protected static final int NUM_TRIES = 5;
-	protected static final long WAIT_BTW_TRIES=1000; //milliseconds between re-tries
-	
-		
-	public FBOpenGraphSearchManager(){
-		profiles = ProfileReaderWriter.readProfiles("C:\\nc\\features\\analytics\\dealanalyzer\\sweetjack-localcoupon-may12012tooct302012.csv");
-		
-	}
-	
-		
-	public void setFacebookClient(FacebookClient c){
-		this.mFBClient=c;
-	}
-	
-	public List<Event> getFBEventsByName(String event)
-	{
-	    List<Event> events = new ArrayList<Event>();
-	    
-	    for(int i=0; i < NUM_TRIES; i++)
-	    {
-    	    try
-    	    {
-        	    Connection<Event> publicSearch =
-        	            mFBClient.fetchConnection("search", Event.class,
-        	              Parameter.with("q", event), Parameter.with("type", "event"),Parameter.with("limit", 100));
-        	    System.out.println("Searching FB events for " + event);
-        	    events= publicSearch.getData();
-        	    break;
-    	    }
-    	    catch(FacebookException e)
-    	    {
-    	    	System.out.println("FBError "+e);
-    	        try
-                {
-                    Thread.sleep(WAIT_BTW_TRIES);
-                }
-                catch (InterruptedException e1)
-                {
-                	System.out.println("Error "+e1);
-                }
-    	    }
-	    }
-	    return events;
-	}
-	
-	public Long getFBPageLikes(String merchant)
-	{
-        List<Page> groups = new ArrayList<Page>();
-        
-        for(int i=0; i < NUM_TRIES; i++)
-        {
-            try
-            {
-                Connection<Page> publicSearch =
-                        mFBClient.fetchConnection("search", Page.class,
-                          Parameter.with("q", merchant), Parameter.with("type", "page"),Parameter.with("limit", 100));
-                System.out.println("Searching FB Pages for " + merchant);
-                groups= publicSearch.getData();
-                break;
-            }
-            catch(FacebookException e)
-            {
-            	System.out.println("FBError "+e);
-                try
-                {
-                    Thread.sleep(WAIT_BTW_TRIES);
-                }
-                catch (InterruptedException e1)
-                {
-                	System.out.println("Error "+e1);
-                }
-            }
-        }
-        
-        for (Page p: groups){
-        	if (p!=null && p.getLikes()!=null && p.getLikes()>0) 
-        		return p.getLikes();
-        }
-        
-        //stats fwb">235</span>
-        
-        for (Page p: groups){
-        	if (p.getId()==null)
-        		continue;
-        	String content = pageFetcher.fetchOrigHTML("http://www.facebook.com/"+p.getId());
-        
-        	String likes = StringUtils.substringBetween(content, "stats fwb\">", "<" );
-        	if (likes==null)
-        		continue;
-        	Integer nLikes =0;
-        	try {
-        	nLikes = Integer.parseInt(likes);
-        	} catch (Exception e){
-        		
-        	}
-        	if (nLikes>0){
-        		return (long)nLikes;
-        	}
-        	
-        }
-        
-        
-        return null;
-	}
-	
-
-    // 
-    
-    public static void main(String[] args){
-    	FBOpenGraphSearchManager man = new FBOpenGraphSearchManager ();
-    	man.setFacebookClient(new DefaultFacebookClient());
-       	
-    	
-    	long res = man.getFBPageLikes("chain saw");
-    	System.out.println(res);
-    	    	
-    }
-}
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.List;
+import org.apache.commons.lang.StringUtils;
+
+import opennlp.tools.jsmlearning.ProfileReaderWriter;
+import opennlp.tools.similarity.apps.utils.PageFetcher;
+import com.restfb.Connection;
+import com.restfb.DefaultFacebookClient;
+import com.restfb.FacebookClient;
+import com.restfb.Parameter;
+import com.restfb.exception.FacebookException;
+import com.restfb.types.Event;
+import com.restfb.types.Page;
+
+
+public class FBOpenGraphSearchManager {
+
+	public List<String[]> profiles = null;
+	protected FacebookClient mFBClient;
+	protected PageFetcher pageFetcher = new PageFetcher();
+	protected static final int NUM_TRIES = 5;
+	protected static final long WAIT_BTW_TRIES=1000; //milliseconds between re-tries
+	
+		
+	public FBOpenGraphSearchManager(){
+		profiles = ProfileReaderWriter.readProfiles("C:\\nc\\features\\analytics\\dealanalyzer\\sweetjack-localcoupon-may12012tooct302012.csv");
+		
+	}
+	
+		
+	public void setFacebookClient(FacebookClient c){
+		this.mFBClient=c;
+	}
+	
+	public List<Event> getFBEventsByName(String event)
+	{
+	    List<Event> events = new ArrayList<Event>();
+	    
+	    for(int i=0; i < NUM_TRIES; i++)
+	    {
+    	    try
+    	    {
+        	    Connection<Event> publicSearch =
+        	            mFBClient.fetchConnection("search", Event.class,
+        	              Parameter.with("q", event), Parameter.with("type", "event"),Parameter.with("limit", 100));
+        	    System.out.println("Searching FB events for " + event);
+        	    events= publicSearch.getData();
+        	    break;
+    	    }
+    	    catch(FacebookException e)
+    	    {
+    	    	System.out.println("FBError "+e);
+    	        try
+                {
+                    Thread.sleep(WAIT_BTW_TRIES);
+                }
+                catch (InterruptedException e1)
+                {
+                	System.out.println("Error "+e1);
+                }
+    	    }
+	    }
+	    return events;
+	}
+	
+	public Long getFBPageLikes(String merchant)
+	{
+        List<Page> groups = new ArrayList<Page>();
+        
+        for(int i=0; i < NUM_TRIES; i++)
+        {
+            try
+            {
+                Connection<Page> publicSearch =
+                        mFBClient.fetchConnection("search", Page.class,
+                          Parameter.with("q", merchant), Parameter.with("type", "page"),Parameter.with("limit", 100));
+                System.out.println("Searching FB Pages for " + merchant);
+                groups= publicSearch.getData();
+                break;
+            }
+            catch(FacebookException e)
+            {
+            	System.out.println("FBError "+e);
+                try
+                {
+                    Thread.sleep(WAIT_BTW_TRIES);
+                }
+                catch (InterruptedException e1)
+                {
+                	System.out.println("Error "+e1);
+                }
+            }
+        }
+        
+        for (Page p: groups){
+        	if (p!=null && p.getLikes()!=null && p.getLikes()>0) 
+        		return p.getLikes();
+        }
+        
+        //stats fwb">235</span>
+        
+        for (Page p: groups){
+        	if (p.getId()==null)
+        		continue;
+        	String content = pageFetcher.fetchOrigHTML("http://www.facebook.com/"+p.getId());
+        
+        	String likes = StringUtils.substringBetween(content, "stats fwb\">", "<" );
+        	if (likes==null)
+        		continue;
+        	Integer nLikes =0;
+        	try {
+        	nLikes = Integer.parseInt(likes);
+        	} catch (Exception e){
+        		
+        	}
+        	if (nLikes>0){
+        		return (long)nLikes;
+        	}
+        	
+        }
+        
+        
+        return null;
+	}
+	
+
+    // 
+    
+    public static void main(String[] args){
+    	FBOpenGraphSearchManager man = new FBOpenGraphSearchManager ();
+    	man.setFacebookClient(new DefaultFacebookClient());
+       	
+    	
+    	long res = man.getFBPageLikes("chain saw");
+    	System.out.println(res);
+    	    	
+    }
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
index 8ddf502..30ed7e3 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
@@ -1,86 +1,86 @@
-package opennlp.tools.apps.review_builder;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.net.URLConnection;
-import java.net.URLDecoder;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.commons.lang.StringUtils;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class MachineTranslationWrapper  {
-	private String translatorURL = "http://mymemory.translated.net/api/get?q=";
-	
-	public String translate(String sentence, String lang2lang){
-		if (sentence==null)
-			return null;
-		String request = translatorURL + sentence.replace(' ','+') + "&langpair="+lang2lang;//"en|es";
-		JSONArray arr=null, prodArr = null, searchURLviewArr = null;
-		try {
-			URL urlC = new URL(request);
-			URLConnection connection = urlC.openConnection();
-
-			String line;
-			String result = "";
-			BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
-			int count = 0;
-			while ((line = reader.readLine()) != null)
-			{
-				result+=line;
-				count++;
-			}
-			JSONObject rootObject = new JSONObject(result);
-			JSONObject  findObject = rootObject.getJSONObject("responseData");
-			String transl = findObject.getString("translatedText");
-			try {
-				transl = URLDecoder.decode(transl, "UTF-8");
-			} catch (Exception e) {
-				
-			}
-			
-			return transl;
-			
-		} catch (MalformedURLException e) {
-			
-			e.printStackTrace();
-			return null;
-		} catch (JSONException e) {
-			e.printStackTrace();
-			return null;			
-		} catch (IOException e) {
-			e.printStackTrace();
-			return null;			
-		}	
-		
-	}
-	
-	public String rePhrase(String sentence){
-		System.out.println("orig = "+ sentence);
-		String transl = translate(sentence, "en|es");
-		System.out.println("tranls = "+transl);
-		String inverseTransl = translate(transl, "es|en");
-		if (!(inverseTransl.indexOf("NO QUERY SPECIFIED")>-1) && !(inverseTransl.indexOf("INVALID LANGUAGE")>-1) && !(inverseTransl.indexOf("MYMEMORY WARNING")>-1))
-			return inverseTransl;
-		else 
-			return sentence;
-	}
-	
-	
-	
-	public static void main(String[] args){
-		MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();
-		
-		System.out.println(rePhraser.translate("I went to the nearest bookstore to buy a book written by my friend and his aunt", "en|ru"));
-		
-		System.out.println(rePhraser.rePhrase("I went to the nearest bookstore to buy a book written by my friend and his aunt"));
-
-	}
-		
-}
+package opennlp.tools.apps.review_builder;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLDecoder;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class MachineTranslationWrapper  {
+	private String translatorURL = "http://mymemory.translated.net/api/get?q=";
+	
+	public String translate(String sentence, String lang2lang){
+		if (sentence==null)
+			return null;
+		String request = translatorURL + sentence.replace(' ','+') + "&langpair="+lang2lang;//"en|es";
+		JSONArray arr=null, prodArr = null, searchURLviewArr = null;
+		try {
+			URL urlC = new URL(request);
+			URLConnection connection = urlC.openConnection();
+
+			String line;
+			String result = "";
+			BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+			int count = 0;
+			while ((line = reader.readLine()) != null)
+			{
+				result+=line;
+				count++;
+			}
+			JSONObject rootObject = new JSONObject(result);
+			JSONObject  findObject = rootObject.getJSONObject("responseData");
+			String transl = findObject.getString("translatedText");
+			try {
+				transl = URLDecoder.decode(transl, "UTF-8");
+			} catch (Exception e) {
+				
+			}
+			
+			return transl;
+			
+		} catch (MalformedURLException e) {
+			
+			e.printStackTrace();
+			return null;
+		} catch (JSONException e) {
+			e.printStackTrace();
+			return null;			
+		} catch (IOException e) {
+			e.printStackTrace();
+			return null;			
+		}	
+		
+	}
+	
+	public String rePhrase(String sentence){
+		System.out.println("orig = "+ sentence);
+		String transl = translate(sentence, "en|es");
+		System.out.println("tranls = "+transl);
+		String inverseTransl = translate(transl, "es|en");
+		if (!(inverseTransl.indexOf("NO QUERY SPECIFIED")>-1) && !(inverseTransl.indexOf("INVALID LANGUAGE")>-1) && !(inverseTransl.indexOf("MYMEMORY WARNING")>-1))
+			return inverseTransl;
+		else 
+			return sentence;
+	}
+	
+	
+	
+	public static void main(String[] args){
+		MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();
+		
+		System.out.println(rePhraser.translate("I went to the nearest bookstore to buy a book written by my friend and his aunt", "en|ru"));
+		
+		System.out.println(rePhraser.rePhrase("I went to the nearest bookstore to buy a book written by my friend and his aunt"));
+
+	}
+		
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
index 73d8417..b0f0362 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
@@ -1,210 +1,210 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.apps.review_builder;
-
-import java.util.Arrays;
-import java.util.List;
-
-import opennlp.tools.similarity.apps.utils.Utils;
-
-import org.apache.commons.lang.StringUtils;
-
-public class MinedSentenceProcessor {
-  public static String acceptableMinedSentence(String sent) {
-    // if too many commas => seo text
-
-    String[] commas = StringUtils.split(sent, ',');
-    String[] spaces = StringUtils.split(sent, ' ');
-    if ((float) commas.length / (float) spaces.length > 0.7) {
-      System.out.println("Rejection: too many commas");
-      return null;
-    }
-    
-    String[] otherDelimiters = StringUtils.split(sent, '/');
-    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
-        System.out.println("Rejection: too many delimiters");
-        return null;
-    }
-    
-    otherDelimiters = StringUtils.split(sent, '.');
-    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
-        System.out.println("Rejection: too many delimiters");
-        return null;
-    }
-    otherDelimiters = StringUtils.split(sent, '!');
-    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
-        System.out.println("Rejection: too many delimiters");
-        return null;
-    }
-    otherDelimiters = StringUtils.split(sent, '=');
-    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
-        System.out.println("Rejection: too many delimiters");
-        return null;
-    }
-    
-    String[] pipes = StringUtils.split(sent, '|');
-    if (StringUtils.split(sent, '|').length > 2
-        || StringUtils.split(sent, '>').length > 2) {
-      System.out.println("Rejection: too many |s or >s ");
-      return null;
-    }
-    String sentTry = sent.toLowerCase();
-    // if too many long spaces
-    String sentSpaces = sentTry.replace("   ", "");
-    if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -
-      // suspicious
-      return null;
-
-    if (sentTry.indexOf("click here") > -1 || sentTry.indexOf(" wikip") > -1
-        || sentTry.indexOf("copyright") > -1
-        || sentTry.indexOf("operating hours") > -1
-        || sentTry.indexOf("days per week") > -1
-        || sentTry.indexOf("click for") > -1 || sentTry.indexOf("photos") > -1
-        || sentTry.indexOf("find the latest") > -1
-        || sentTry.startsWith("subscribe")
-        || sentTry.indexOf("Terms of Service") > -1
-        || sentTry.indexOf("clicking here") > -1
-        || sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1
-        || sentTry.indexOf("Tags:") > -1 || sentTry.startsWith("Posted by")
-        || sentTry.indexOf("available online") > -1
-        || sentTry.indexOf("get online") > -1
-        || sentTry.indexOf("buy online") > -1
-        || sentTry.indexOf("not valid") > -1 || sentTry.indexOf("discount") > -1
-        || sentTry.indexOf("official site") > -1
-        || sentTry.indexOf("this video") > -1
-        || sentTry.indexOf("this book") > -1
-        || sentTry.indexOf("this product") > -1
-        || sentTry.indexOf("paperback") > -1 || sentTry.indexOf("hardcover") > -1
-        || sentTry.indexOf("audio cd") > -1
-        || sentTry.indexOf("related searches") > -1
-        || sentTry.indexOf("permission is granted") > -1
-        || sentTry.indexOf("[edit") > -1
-        || sentTry.indexOf("edit categories") > -1
-        || sentTry.indexOf("free license") > -1
-        || sentTry.indexOf("permission is granted") > -1
-        || sentTry.indexOf("under the terms") > -1
-        || sentTry.indexOf("rights reserved") > -1
-        || sentTry.indexOf("wikipedia") > -1 || sentTry.endsWith("the")
-        || sentTry.endsWith("the.") || sentTry.startsWith("below") 
-        || sentTry.indexOf("recipient of")>-1 || sentTry.indexOf("this message")>-1 
-        ||sentTry.indexOf( "mailing list")>-1 ||sentTry.indexOf( "purchase order")>-1
-        ||sentTry.indexOf( "mon-fri")>-1 ||sentTry.indexOf( "email us")>-1 ||sentTry.indexOf( "privacy pol")>-1 ||sentTry.indexOf( "back to top")>-1 
-        ||sentTry.indexOf( "click here")>-1 ||sentTry.indexOf( "for details")>-1 ||sentTry.indexOf( "assistance?")>-1 ||sentTry.indexOf( "chat live")>-1
-        ||sentTry.indexOf( "free shipping")>-1 ||sentTry.indexOf( "company info")>-1 ||sentTry.indexOf( "satisfaction g")>-1 ||sentTry.indexOf( "contact us")>-1
-        ||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")||sentTry.indexOf( "conditions")>-1 ||sentTry.indexOf( "chat live")>-1
-        ||sentTry.startsWith( "we ") ||sentTry.indexOf( "the recipient")>-1 ||sentTry.indexOf( "day return")>-1 ||sentTry.indexOf( "days return")>-1
-        
-        ||sentTry.startsWith( "fax") ||sentTry.indexOf( "refund it")>-1 || sentTry.indexOf( "your money")>-1
-        ||sentTry.startsWith( "free") ||sentTry.indexOf( "purchase orders")>-1
-        ||sentTry.startsWith( "exchange it ") ||sentTry.indexOf( "return it")>-1 ||sentTry.indexOf( "credit card")>-1 
-        
-        ||sentTry.indexOf( "storeshop")>-1 || sentTry.startsWith( "find") || sentTry.startsWith( "shop") || sentTry.startsWith( "unlimited") 
-        ||sentTry.indexOf( "for a limited time")>-1 ||sentTry.indexOf( "prime members")>-1 ||sentTry.indexOf( "amazon members")>-1 ||sentTry.indexOf( "unlimited free")>-1 
-        ||sentTry.indexOf( "shipping")>-1 || sentTry.startsWith( "amazon")
-// not a script text
-        ||sentTry.indexOf( "document.body")>-1 ||sentTry.indexOf( " var ")>-1         ||sentTry.indexOf( "search suggestions")>-1 ||sentTry.startsWith( "Search") 
-        
-    		)
-      return null;
-    
-    //Millions of Amazon Prime members enjoy instant videos, free Kindle books and unlimited free two-day shipping.
-
-    // count symbols indicating wrong parts of page to mine for text
-    // if short and contains too many symbols indicating wrong area: reject
-    String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&")
-        .replace("|", "&&&").replace(":", "&&&").replace("/", "&&&")
-        .replace("-", "&&&").replace("%", "&&&");
-    if ((sentWrongSym.length() - sentTry.length()) >= 4
-        && sentTry.length() < 200) // twice ot more
-      return null;
-
-    sent = sent.replace('[', ' ').replace(']', ' ')
-        .replace("_should_find_orig_", "").replace(".   .", ". ")
-        .replace("amp;", " ").replace("1.", " ").replace("2.", " ")
-        .replace("3.", " ").replace("4.", " ").replace("2009", "2011")
-        .replace("2008", "2011").replace("2006", "2011")
-        .replace("2007", "2011").replace("VIDEO:", " ").replace("Video:", " ")
-        .replace("no comments", " ").replace("  ", " ").replace("  ", " ")
-        .replace("(more.)", "").replace("more.", "").replace("<more>", "")
-        .replace("[more]", "").replace(".,", ".").replace("&lt;", "")
-        .replace("p&gt;", "").replace("product description", "");
-
-    // TODO .replace("a.", ".");
-
-    int endIndex = sent.indexOf(" posted");
-    if (endIndex > 0)
-      sent = sent.substring(0, endIndex);
-
-    return sent;
-  }
-
-  public static String processSentence(String pageSentence) {
-    if (pageSentence == null)
-      return "";
-    pageSentence = Utils.fullStripHTML(pageSentence);
-    pageSentence = StringUtils.chomp(pageSentence, "..");
-    pageSentence = StringUtils.chomp(pageSentence, ". .");
-    pageSentence = StringUtils.chomp(pageSentence, " .");
-    pageSentence = StringUtils.chomp(pageSentence, ".");
-    pageSentence = StringUtils.chomp(pageSentence, "...");
-    pageSentence = StringUtils.chomp(pageSentence, " ....");
-    pageSentence = pageSentence.replace("::", ":").replace(".,", ". ")
-        .replace("(.)", "");
-
-    pageSentence = pageSentence.trim();
-    pageSentence = pageSentence.replaceAll("\\s+", " "); // make single
-    // spaces
-    // everywhere
-
-    String[] pipes = StringUtils.split(pageSentence, '|'); // removed
-    // shorter part
-    // of sentence
-    // at the end
-    // after pipe
-    if (pipes.length == 2
-        && ((float) pipes[0].length() / (float) pipes[1].length() > 3.0)) {
-      int pipePos = pageSentence.indexOf("|");
-      if (pipePos > -1)
-        pageSentence = pageSentence.substring(0, pipePos - 1).trim();
-
-    }
-
-    if (!StringUtils.contains(pageSentence, '.')
-        && !StringUtils.contains(pageSentence, '?')
-        && !StringUtils.contains(pageSentence, '!'))
-      pageSentence = pageSentence + ". ";
-
-    pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();
-    if (!pageSentence.endsWith("."))
-      pageSentence += ". ";
-    return pageSentence;
-  }
-
-  
-  public static String normalizeForSentenceSplitting(String pageContent) {
-    pageContent.replace("Jan.", "January").replace("Feb.", "February")
-        .replace("Mar.", "March").replace("Apr.", "April")
-        .replace("Jun.", "June").replace("Jul.", "July")
-        .replace("Aug.", "August").replace("Sep.", "September")
-        .replace("Oct.", "October").replace("Nov.", "November")
-        .replace("Dec.", "December");
-
-    return pageContent;
-
-  }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.review_builder;
+
+import java.util.Arrays;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.utils.Utils;
+
+import org.apache.commons.lang.StringUtils;
+
+public class MinedSentenceProcessor {
+  public static String acceptableMinedSentence(String sent) {
+    // if too many commas => seo text
+
+    String[] commas = StringUtils.split(sent, ',');
+    String[] spaces = StringUtils.split(sent, ' ');
+    if ((float) commas.length / (float) spaces.length > 0.7) {
+      System.out.println("Rejection: too many commas");
+      return null;
+    }
+    
+    String[] otherDelimiters = StringUtils.split(sent, '/');
+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
+        System.out.println("Rejection: too many delimiters");
+        return null;
+    }
+    
+    otherDelimiters = StringUtils.split(sent, '.');
+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
+        System.out.println("Rejection: too many delimiters");
+        return null;
+    }
+    otherDelimiters = StringUtils.split(sent, '!');
+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
+        System.out.println("Rejection: too many delimiters");
+        return null;
+    }
+    otherDelimiters = StringUtils.split(sent, '=');
+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
+        System.out.println("Rejection: too many delimiters");
+        return null;
+    }
+    
+    String[] pipes = StringUtils.split(sent, '|');
+    if (StringUtils.split(sent, '|').length > 2
+        || StringUtils.split(sent, '>').length > 2) {
+      System.out.println("Rejection: too many |s or >s ");
+      return null;
+    }
+    String sentTry = sent.toLowerCase();
+    // if too many long spaces
+    String sentSpaces = sentTry.replace("   ", "");
+    if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -
+      // suspicious
+      return null;
+
+    if (sentTry.indexOf("click here") > -1 || sentTry.indexOf(" wikip") > -1
+        || sentTry.indexOf("copyright") > -1
+        || sentTry.indexOf("operating hours") > -1
+        || sentTry.indexOf("days per week") > -1
+        || sentTry.indexOf("click for") > -1 || sentTry.indexOf("photos") > -1
+        || sentTry.indexOf("find the latest") > -1
+        || sentTry.startsWith("subscribe")
+        || sentTry.indexOf("Terms of Service") > -1
+        || sentTry.indexOf("clicking here") > -1
+        || sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1
+        || sentTry.indexOf("Tags:") > -1 || sentTry.startsWith("Posted by")
+        || sentTry.indexOf("available online") > -1
+        || sentTry.indexOf("get online") > -1
+        || sentTry.indexOf("buy online") > -1
+        || sentTry.indexOf("not valid") > -1 || sentTry.indexOf("discount") > -1
+        || sentTry.indexOf("official site") > -1
+        || sentTry.indexOf("this video") > -1
+        || sentTry.indexOf("this book") > -1
+        || sentTry.indexOf("this product") > -1
+        || sentTry.indexOf("paperback") > -1 || sentTry.indexOf("hardcover") > -1
+        || sentTry.indexOf("audio cd") > -1
+        || sentTry.indexOf("related searches") > -1
+        || sentTry.indexOf("permission is granted") > -1
+        || sentTry.indexOf("[edit") > -1
+        || sentTry.indexOf("edit categories") > -1
+        || sentTry.indexOf("free license") > -1
+        || sentTry.indexOf("permission is granted") > -1
+        || sentTry.indexOf("under the terms") > -1
+        || sentTry.indexOf("rights reserved") > -1
+        || sentTry.indexOf("wikipedia") > -1 || sentTry.endsWith("the")
+        || sentTry.endsWith("the.") || sentTry.startsWith("below") 
+        || sentTry.indexOf("recipient of")>-1 || sentTry.indexOf("this message")>-1 
+        ||sentTry.indexOf( "mailing list")>-1 ||sentTry.indexOf( "purchase order")>-1
+        ||sentTry.indexOf( "mon-fri")>-1 ||sentTry.indexOf( "email us")>-1 ||sentTry.indexOf( "privacy pol")>-1 ||sentTry.indexOf( "back to top")>-1 
+        ||sentTry.indexOf( "click here")>-1 ||sentTry.indexOf( "for details")>-1 ||sentTry.indexOf( "assistance?")>-1 ||sentTry.indexOf( "chat live")>-1
+        ||sentTry.indexOf( "free shipping")>-1 ||sentTry.indexOf( "company info")>-1 ||sentTry.indexOf( "satisfaction g")>-1 ||sentTry.indexOf( "contact us")>-1
+        ||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")||sentTry.indexOf( "conditions")>-1 ||sentTry.indexOf( "chat live")>-1
+        ||sentTry.startsWith( "we ") ||sentTry.indexOf( "the recipient")>-1 ||sentTry.indexOf( "day return")>-1 ||sentTry.indexOf( "days return")>-1
+        
+        ||sentTry.startsWith( "fax") ||sentTry.indexOf( "refund it")>-1 || sentTry.indexOf( "your money")>-1
+        ||sentTry.startsWith( "free") ||sentTry.indexOf( "purchase orders")>-1
+        ||sentTry.startsWith( "exchange it ") ||sentTry.indexOf( "return it")>-1 ||sentTry.indexOf( "credit card")>-1 
+        
+        ||sentTry.indexOf( "storeshop")>-1 || sentTry.startsWith( "find") || sentTry.startsWith( "shop") || sentTry.startsWith( "unlimited") 
+        ||sentTry.indexOf( "for a limited time")>-1 ||sentTry.indexOf( "prime members")>-1 ||sentTry.indexOf( "amazon members")>-1 ||sentTry.indexOf( "unlimited free")>-1 
+        ||sentTry.indexOf( "shipping")>-1 || sentTry.startsWith( "amazon")
+// not a script text
+        ||sentTry.indexOf( "document.body")>-1 ||sentTry.indexOf( " var ")>-1         ||sentTry.indexOf( "search suggestions")>-1 ||sentTry.startsWith( "Search") 
+        
+    		)
+      return null;
+    
+    //Millions of Amazon Prime members enjoy instant videos, free Kindle books and unlimited free two-day shipping.
+
+    // count symbols indicating wrong parts of page to mine for text
+    // if short and contains too many symbols indicating wrong area: reject
+    String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&")
+        .replace("|", "&&&").replace(":", "&&&").replace("/", "&&&")
+        .replace("-", "&&&").replace("%", "&&&");
+    if ((sentWrongSym.length() - sentTry.length()) >= 4
+        && sentTry.length() < 200) // twice ot more
+      return null;
+
+    sent = sent.replace('[', ' ').replace(']', ' ')
+        .replace("_should_find_orig_", "").replace(".   .", ". ")
+        .replace("amp;", " ").replace("1.", " ").replace("2.", " ")
+        .replace("3.", " ").replace("4.", " ").replace("2009", "2011")
+        .replace("2008", "2011").replace("2006", "2011")
+        .replace("2007", "2011").replace("VIDEO:", " ").replace("Video:", " ")
+        .replace("no comments", " ").replace("  ", " ").replace("  ", " ")
+        .replace("(more.)", "").replace("more.", "").replace("<more>", "")
+        .replace("[more]", "").replace(".,", ".").replace("&lt;", "")
+        .replace("p&gt;", "").replace("product description", "");
+
+    // TODO .replace("a.", ".");
+
+    int endIndex = sent.indexOf(" posted");
+    if (endIndex > 0)
+      sent = sent.substring(0, endIndex);
+
+    return sent;
+  }
+
+  public static String processSentence(String pageSentence) {
+    if (pageSentence == null)
+      return "";
+    pageSentence = Utils.fullStripHTML(pageSentence);
+    pageSentence = StringUtils.chomp(pageSentence, "..");
+    pageSentence = StringUtils.chomp(pageSentence, ". .");
+    pageSentence = StringUtils.chomp(pageSentence, " .");
+    pageSentence = StringUtils.chomp(pageSentence, ".");
+    pageSentence = StringUtils.chomp(pageSentence, "...");
+    pageSentence = StringUtils.chomp(pageSentence, " ....");
+    pageSentence = pageSentence.replace("::", ":").replace(".,", ". ")
+        .replace("(.)", "");
+
+    pageSentence = pageSentence.trim();
+    pageSentence = pageSentence.replaceAll("\\s+", " "); // make single
+    // spaces
+    // everywhere
+
+    String[] pipes = StringUtils.split(pageSentence, '|'); // removed
+    // shorter part
+    // of sentence
+    // at the end
+    // after pipe
+    if (pipes.length == 2
+        && ((float) pipes[0].length() / (float) pipes[1].length() > 3.0)) {
+      int pipePos = pageSentence.indexOf("|");
+      if (pipePos > -1)
+        pageSentence = pageSentence.substring(0, pipePos - 1).trim();
+
+    }
+
+    if (!StringUtils.contains(pageSentence, '.')
+        && !StringUtils.contains(pageSentence, '?')
+        && !StringUtils.contains(pageSentence, '!'))
+      pageSentence = pageSentence + ". ";
+
+    pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();
+    if (!pageSentence.endsWith("."))
+      pageSentence += ". ";
+    return pageSentence;
+  }
+
+  
+  public static String normalizeForSentenceSplitting(String pageContent) {
+    pageContent.replace("Jan.", "January").replace("Feb.", "February")
+        .replace("Mar.", "March").replace("Apr.", "April")
+        .replace("Jun.", "June").replace("Jul.", "July")
+        .replace("Aug.", "August").replace("Sep.", "September")
+        .replace("Oct.", "October").replace("Nov.", "November")
+        .replace("Dec.", "December");
+
+    return pageContent;
+
+  }
 }
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
index 956640f..f8dfaa8 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
@@ -1,166 +1,166 @@
-package opennlp.tools.apps.review_builder;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.jsmlearning.ProfileReaderWriter;
-import opennlp.tools.parse_thicket.Triple;
-
-public class ReviewBuilderRunner {
-
-	private List<Triple> input = new ArrayList<Triple>(); 
-
-	public ReviewBuilderRunner(){
-
-		/*	input.add( new Pair<String, Integer>("chief architect portable mobile tv", 204973051));
-
-	input.add( new Pair<String, Integer>("lg plasma tv", 215734562));
-	input.add( new Pair<String, Integer>("magnavox lcd hdtv", 215415652));
-	input.add( new Pair<String, Integer>("yamaha aventage home theater receiver", 215742271));
-	input.add( new Pair<String, Integer>("panasonic 24inch lcd tv", 215742233));
-	input.add( new Pair<String, Integer>("otterbox barnes and noble nook commuter case", 215572161));
-	input.add( new Pair<String, Integer>("sony kdl32ex340 led tv", 215743925));
-	input.add( new Pair<String, Integer>("alpine waterfall tabletop fountain lighting", 215135546));
-    input.add( new Pair<String, Integer>("ihome rechargeable speaker system", 215363231 ));
-	input.add( new Pair<String, Integer>("ion slide film scanner", 212088884));
-
-		 input.add( new Pair<String, Integer>("mens dr martens shoes black nappa", 210813142));
-		 input.add( new Pair<String, Integer>("calvin klein seamless thong panty", 201984853));
-		 input.add( new Pair<String, Integer>("mens clarks shoes wallabee beeswax leather", 210808477));
-		//? input.add( new Pair<String, Integer>("mens sperry topsider shoes", 210809238));
-		 input.add( new Pair<String, Integer>("mens giorgio brutini shoes italian calf", 210809508));
-
-		input.add( new Pair<String, Integer>("halo portable backup battery", 1640825398));
-input.add( new Pair<String, Integer>("kenwood pkgmp18 cd receiver  coaxial speakers",1642712915));
-input.add( new Pair<String, Integer>("element ultraslim hdtv",1643167865));
-input.add( new Pair<String, Integer>("westinghouse  dled hdtv black",1641930013));
-input.add( new Pair<String, Integer>("boss audio receiver speaker package system",1643532459));
-input.add( new Pair<String, Integer>("kenwood  cd receiver coaxial speakers bundle",1646566070));
-input.add( new Pair<String, Integer>("element electronics lcd tv black ",1637163018));
-input.add( new Pair<String, Integer>("stunt copter rechargeable battery pack",1636937811));
-input.add( new Pair<String, Integer>("element led ultraslim hdtv  soundbar",1637572596));
-input.add( new Pair<String, Integer>("boss  receiver speaker package system bundle",1646566067));
-input.add( new Pair<String, Integer>("coby  hd tv",1638746307));
-input.add( new Pair<String, Integer>("vizio  diag led smart hdtv",1660162001));
-input.add( new Pair<String, Integer>("sony dock for ipad ipod and iphone",1646826284));
-input.add( new Pair<String, Integer>("vizio  led  ultraslim hdtv",1642018249));
-input.add( new Pair<String, Integer>("lcd kula tv multimedia player",1640265845));
-
-input.add(new Pair<String, Integer>("liz and co alex tall leather boots",1630836375));
-input.add( new Pair<String, Integer>("total girl silvia sequin moccasin", 1630828314));
-input.add( new Pair<String, Integer>("new england patriots new era nfl sport sideline knit", 1588531904));
-input.add( new Pair<String, Integer>("betseyville sequin backpack", 1630825375));
-input.add( new Pair<String, Integer>("the north face womens osito jacket mojito", 1639791775));
-input.add( new Pair<String, Integer>("misty harbor raincoat trench removable liner", 903542613));
-input.add(new Pair<String, Integer>("ae womens camo jacket ", 1229070780));
-input.add(new Pair<String, Integer>("indianapolis colts sideline knit", 1588531896));
-input.add(new Pair<String, Integer>("b o c korah boot", 1622401738));
-input.add(new Pair<String, Integer>("adidas mens speed cut track suit", 920744865));
-input.add(new Pair<String, Integer>("liz and co lulu zipper boots", 1630836380));
-input.add(new Pair<String, Integer>("black navy  lightweight oxford shoes", 906123996));
-input.add(new Pair<String, Integer>("liz and co farley tall boots", 1639960280));
-input.add(new Pair<String, Integer>("call it spring karpin  pullon boots", 1629938981));
-input.add(new Pair<String, Integer>("ugg australia bailey bow boots", 1594029054));
-input.add(new Pair<String, Integer>("dream chasers  jacket", 1631247949));
-input.add(new Pair<String, Integer>("guess military  tiewaist coat", 1629993909));
-input.add(new Pair<String, Integer>("madden girl allstaar womens zip boots", 1581506993));
-input.add(new Pair<String, Integer>("michael womens shoes", 1590598743));
-input.add(new Pair<String, Integer>("sonoma life style suede midcalf boots women", 1617302927));
-
-		input.add(new Pair<String, Integer>("absolute pnf300 power noise filterground loop isolator with adjustable controls", 1521965454));
-		input.add(new Pair<String, Integer>("sennheiser ie8 stereo earbuds", 211969101));
-		input.add(new Pair<String, Integer>("sanus vlmf109 motorized full motion mount for tvs 37 60 up to 110 lbs", 214893385));
-		input.add(new Pair<String, Integer>("s2fmcy003 earset stereo earbud binaural open miniphone black", 214972916));
-		input.add(new Pair<String, Integer>("boconi bags and leather bryant safari bag carry on luggage brown", 1646568995));
-		input.add(new Pair<String, Integer>("diesel derik pant jyt mens pajama gray", 1645725530));
-		input.add(new Pair<String, Integer>("sole society gina sandal", 1633021283));
-		input.add(new Pair<String, Integer>("toms bimini stitchout slipon women", 1633012540));
-		input.add(new Pair<String, Integer>("the north face womens p r tka 100 microvelour glacier 14 zip tnf blackjk3 medium", 1618022193));
-		input.add(new Pair<String, Integer>("robert graham manuel dress shirt mens long sleeve button up blue", 1631119485));
-
-		input.add(new Pair<String, Integer>("b o c leesa", 1584193288));
-			input.add(new Pair<String, Integer>("blair stirrup pants", 1525621516));
-			input.add(new Pair<String, Integer>("donna karan shirtdress", 1463793963));
-			input.add(new Pair<String, Integer>("columbia sportswear terminal tackle shirt", 1661238030));
-			input.add(new Pair<String, Integer>("carters jersey pajamas", 1573999243));
-			input.add(new Pair<String, Integer>("vince camuto dena", 1626272001));
-			input.add(new Pair<String, Integer>("pistil hudson knit hats", 1660874149));
-			input.add(new Pair<String, Integer>("naturalizer trinity wide shaft womens zip", 1569191459));
-			input.add(new Pair<String, Integer>("bare traps chelby womens sandals", 1513387756));
-			input.add(new Pair<String, Integer>("overland storage hard drive 1 tb hotswap", 212107374));
-			input.add(new Pair<String, Integer>("humminbird indash depth finder", 1616650484));
-			input.add(new Pair<String, Integer>("grepsr800 gre dig scanner", 215723895));
-			input.add(new Pair<String, Integer>("humminbird kayak transducer", 215392426));
-			input.add(new Pair<String, Integer>("garmin nuvi suction cup mount ", 215728710));
-			input.add(new Pair<String, Integer>("crosley radio black", 215662289));
-
-		    input.add(new Triple<String, Integer, String >("avaya ip telephone", 1440488008, "lucent phone system"));
-			input.add(new Triple<String, Integer, String>("clarks trolley womens shoes", 1581854074, "clark womens shoes"));
-			input.add(new Triple<String, Integer, String>("mens evans shoes imperial deer", 210808400, "lb evans slippers"));
-			input.add(new Triple<String, Integer, String>("ugg classic bow shorty gloves", 1665094898, "leather gloves women"));
-			input.add(new Triple<String, Integer, String>("jumping beans man tee baby", 1667155332, "jumping beans clothing"));
-			input.add(new Triple<String, Integer, String>("asics mens shoes", 1630208773, "asics mens running shoes"));
-			input.add(new Triple<String, Integer, String>("oakley hoodie mens fleece", 1656661466, "hoodies for men"));
-			input.add(new Triple<String, Integer, String>("usb sound control digital voice recorder", 1654662662, "digital voice recorder with usb"));
-			input.add(new Triple<String, Integer, String>("motorola bluetooth headset", 215376254, "motorola oasis bluetooth headset"));
-			input.add(new Triple<String, Integer, String>("sony sound bar home theater system", 215450833, "sony sound bar"));
-			input.add(new Triple<String, Integer, String>("jvc full hd everio camcorder", 1664479999, "jvc everio camcorder"));
-		 */
-		
-		 input.add(new Triple<String, Integer, String>("dr martens beckett laceup boots", 1651452641, "doc martin shoes"));
-		 input.add(new Triple<String, Integer, String>("pioneer cd changer",204654672, "pioneer cd player"));
-		 input.add(new Triple<String, Integer, String>("tablet handler strap and desk mount", 1634326303, "tablet holder"));
-		 input.add(new Triple<String, Integer, String>("sockwell loden womens overthecalf socks", 1644572708, "compression stockings, support stockings"));
-		 input.add(new Triple<String, Integer, String>("nike eclipse womens shoes", 1657807048, "nike eclipse ii women s shoe"));
-		 input.add(new Triple<String, Integer, String>("cherokee workwear womens scrub pant black stall",211643295, "cherokee workwear scrubs"));
-		 input.add(new Triple<String, Integer, String>("columbia sportswear jacket ", 1667381935, "columbia omni heat"));
-		 input.add(new Triple<String, Integer, String>("adidas adipure jacket", 1040124787, "adidas track jacket"));
-		 input.add(new Triple<String, Integer, String>("clarks may orchid womens shoes", 1585805688, "clarks loafers"));
-		 input.add(new Triple<String, Integer, String>("levis pants empire blue", 1670283141, "skinny jeans for guys"));
-		 input.add(new Triple<String, Integer, String>("nike jordan black cat tee", 1653598764, "jordan black cat"));
-		 input.add(new Triple<String, Integer, String>("obermeyer womens kassandra down coat", 1670629180, "down winter coats"));
-/*
-		 input.add(new Triple<String, Integer, String>("paramax  surround sound", 835422569, "paramax im3"));
-		 input.add(new Triple<String, Integer, String>("mia quincy wedge", 1285886230, "mia quincy wedge"));
-		 input.add(new Triple<String, Integer, String>("able planet headphones", 1648522886, "able planet nc210g"));
-		 input.add(new Triple<String, Integer, String>("samsung replacement lamp", 695793593, "lamp code bp96"));
-		 input.add(new Triple<String, Integer, String>("paul green emerson boot castagno", 1313967918, "paul green emerson boot"));
-		 input.add(new Triple<String, Integer, String>("bandolino caresse boots", 1448643623, "bandolino caresse boots"));
-		 input.add(new Triple<String, Integer, String>("nine west modiley", 1365998968, "nine west modiley"));
-		 input.add(new Triple<String, Integer, String>("converse chuck taylor  bisay", 1555900934, "turquoise chuck taylors"));
-		 input.add(new Triple<String, Integer, String>("gentle souls bay leaf flats", 1436175162, "gentle souls bay leaf"));
-		 input.add(new Triple<String, Integer, String>("sauce hockey  back hat", 1644440355, "sauce hockey discount code"));
-		 input.add(new Triple<String, Integer, String>("aravon farren oxford shoes", 1644573438, "aravon wef07sh"));
-	*/	 input.add(new Triple<String, Integer, String>("kooba crosby hobo handbags", 1326503038, "kooba crosby"));
-		 input.add(new Triple<String, Integer, String>("bcbgmaxazria sheath dress", 1313949777, "bcbgmaxazria illusion bodice ruched sheath dress"));
-		 input.add(new Triple<String, Integer, String>("billabong boardshorts trunks", 1316823074, "la siesta boardshorts"));
-		 input.add(new Triple<String, Integer, String>("mootsies tootsies boot", 1503727310, "mootsies tootsies draker"));
-		 input.add(new Triple<String, Integer, String>("nine west bootie", 1503730060, "nine west drina"));
-		 input.add(new Triple<String, Integer, String>("playtex support cotton ", 1331026244, "playtex t723"));
-		 input.add(new Triple<String, Integer, String>("fossil morgan satchel taupe", 1355165745, "fossil morgan satchel"));
-		 input.add(new Triple<String, Integer, String>("katonah womens boots brown", 1420057844, "boc katonah boots"));
-		 input.add(new Triple<String, Integer, String>("boot cut jeans supernova", 1363356262, "levis 527 supernova"));
-		 input.add(new Triple<String, Integer, String>("steve madden buckie boot", 1313965918, "steve madden buckie boot"));
-		 input.add(new Triple<String, Integer, String>("charlies horse tshirt", 1428490587, "charlie s horse shirt"));
-		 input.add(new Triple<String, Integer, String>("igloo little playmate ice chest", 205421625, "igloo little playmate"));
-		 input.add(new Triple<String, Integer, String>("mark nason boot", 1313951044, "mark nason rudd"));
-
-
-
-	}
-
-	public static void main(String[] args){
-		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/relevanceEngine/src/test/resources");
-		ReviewBuilderRunner r = new ReviewBuilderRunner();
-		WebPageReviewExtractor extractor = new WebPageReviewExtractor("C:/workspace/relevanceEngine/src/test/resources");
-		for(Triple query_ID : r.input ){
-			String query = (String) query_ID.getFirst();
-			List<String> res = extractor.formReviewsForAProduct(query);
-
-			ProfileReaderWriter.writeReportListStr(res, "formedReviewSentences"+ query +".csv");
-		}
-
-
-
-	}
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.jsmlearning.ProfileReaderWriter;
+import opennlp.tools.parse_thicket.Triple;
+
+public class ReviewBuilderRunner {
+
+	private List<Triple> input = new ArrayList<Triple>(); 
+
+	public ReviewBuilderRunner(){
+
+		/*	input.add( new Pair<String, Integer>("chief architect portable mobile tv", 204973051));
+
+	input.add( new Pair<String, Integer>("lg plasma tv", 215734562));
+	input.add( new Pair<String, Integer>("magnavox lcd hdtv", 215415652));
+	input.add( new Pair<String, Integer>("yamaha aventage home theater receiver", 215742271));
+	input.add( new Pair<String, Integer>("panasonic 24inch lcd tv", 215742233));
+	input.add( new Pair<String, Integer>("otterbox barnes and noble nook commuter case", 215572161));
+	input.add( new Pair<String, Integer>("sony kdl32ex340 led tv", 215743925));
+	input.add( new Pair<String, Integer>("alpine waterfall tabletop fountain lighting", 215135546));
+    input.add( new Pair<String, Integer>("ihome rechargeable speaker system", 215363231 ));
+	input.add( new Pair<String, Integer>("ion slide film scanner", 212088884));
+
+		 input.add( new Pair<String, Integer>("mens dr martens shoes black nappa", 210813142));
+		 input.add( new Pair<String, Integer>("calvin klein seamless thong panty", 201984853));
+		 input.add( new Pair<String, Integer>("mens clarks shoes wallabee beeswax leather", 210808477));
+		//? input.add( new Pair<String, Integer>("mens sperry topsider shoes", 210809238));
+		 input.add( new Pair<String, Integer>("mens giorgio brutini shoes italian calf", 210809508));
+
+		input.add( new Pair<String, Integer>("halo portable backup battery", 1640825398));
+input.add( new Pair<String, Integer>("kenwood pkgmp18 cd receiver  coaxial speakers",1642712915));
+input.add( new Pair<String, Integer>("element ultraslim hdtv",1643167865));
+input.add( new Pair<String, Integer>("westinghouse  dled hdtv black",1641930013));
+input.add( new Pair<String, Integer>("boss audio receiver speaker package system",1643532459));
+input.add( new Pair<String, Integer>("kenwood  cd receiver coaxial speakers bundle",1646566070));
+input.add( new Pair<String, Integer>("element electronics lcd tv black ",1637163018));
+input.add( new Pair<String, Integer>("stunt copter rechargeable battery pack",1636937811));
+input.add( new Pair<String, Integer>("element led ultraslim hdtv  soundbar",1637572596));
+input.add( new Pair<String, Integer>("boss  receiver speaker package system bundle",1646566067));
+input.add( new Pair<String, Integer>("coby  hd tv",1638746307));
+input.add( new Pair<String, Integer>("vizio  diag led smart hdtv",1660162001));
+input.add( new Pair<String, Integer>("sony dock for ipad ipod and iphone",1646826284));
+input.add( new Pair<String, Integer>("vizio  led  ultraslim hdtv",1642018249));
+input.add( new Pair<String, Integer>("lcd kula tv multimedia player",1640265845));
+
+input.add(new Pair<String, Integer>("liz and co alex tall leather boots",1630836375));
+input.add( new Pair<String, Integer>("total girl silvia sequin moccasin", 1630828314));
+input.add( new Pair<String, Integer>("new england patriots new era nfl sport sideline knit", 1588531904));
+input.add( new Pair<String, Integer>("betseyville sequin backpack", 1630825375));
+input.add( new Pair<String, Integer>("the north face womens osito jacket mojito", 1639791775));
+input.add( new Pair<String, Integer>("misty harbor raincoat trench removable liner", 903542613));
+input.add(new Pair<String, Integer>("ae womens camo jacket ", 1229070780));
+input.add(new Pair<String, Integer>("indianapolis colts sideline knit", 1588531896));
+input.add(new Pair<String, Integer>("b o c korah boot", 1622401738));
+input.add(new Pair<String, Integer>("adidas mens speed cut track suit", 920744865));
+input.add(new Pair<String, Integer>("liz and co lulu zipper boots", 1630836380));
+input.add(new Pair<String, Integer>("black navy  lightweight oxford shoes", 906123996));
+input.add(new Pair<String, Integer>("liz and co farley tall boots", 1639960280));
+input.add(new Pair<String, Integer>("call it spring karpin  pullon boots", 1629938981));
+input.add(new Pair<String, Integer>("ugg australia bailey bow boots", 1594029054));
+input.add(new Pair<String, Integer>("dream chasers  jacket", 1631247949));
+input.add(new Pair<String, Integer>("guess military  tiewaist coat", 1629993909));
+input.add(new Pair<String, Integer>("madden girl allstaar womens zip boots", 1581506993));
+input.add(new Pair<String, Integer>("michael womens shoes", 1590598743));
+input.add(new Pair<String, Integer>("sonoma life style suede midcalf boots women", 1617302927));
+
+		input.add(new Pair<String, Integer>("absolute pnf300 power noise filterground loop isolator with adjustable controls", 1521965454));
+		input.add(new Pair<String, Integer>("sennheiser ie8 stereo earbuds", 211969101));
+		input.add(new Pair<String, Integer>("sanus vlmf109 motorized full motion mount for tvs 37 60 up to 110 lbs", 214893385));
+		input.add(new Pair<String, Integer>("s2fmcy003 earset stereo earbud binaural open miniphone black", 214972916));
+		input.add(new Pair<String, Integer>("boconi bags and leather bryant safari bag carry on luggage brown", 1646568995));
+		input.add(new Pair<String, Integer>("diesel derik pant jyt mens pajama gray", 1645725530));
+		input.add(new Pair<String, Integer>("sole society gina sandal", 1633021283));
+		input.add(new Pair<String, Integer>("toms bimini stitchout slipon women", 1633012540));
+		input.add(new Pair<String, Integer>("the north face womens p r tka 100 microvelour glacier 14 zip tnf blackjk3 medium", 1618022193));
+		input.add(new Pair<String, Integer>("robert graham manuel dress shirt mens long sleeve button up blue", 1631119485));
+
+		input.add(new Pair<String, Integer>("b o c leesa", 1584193288));
+			input.add(new Pair<String, Integer>("blair stirrup pants", 1525621516));
+			input.add(new Pair<String, Integer>("donna karan shirtdress", 1463793963));
+			input.add(new Pair<String, Integer>("columbia sportswear terminal tackle shirt", 1661238030));
+			input.add(new Pair<String, Integer>("carters jersey pajamas", 1573999243));
+			input.add(new Pair<String, Integer>("vince camuto dena", 1626272001));
+			input.add(new Pair<String, Integer>("pistil hudson knit hats", 1660874149));
+			input.add(new Pair<String, Integer>("naturalizer trinity wide shaft womens zip", 1569191459));
+			input.add(new Pair<String, Integer>("bare traps chelby womens sandals", 1513387756));
+			input.add(new Pair<String, Integer>("overland storage hard drive 1 tb hotswap", 212107374));
+			input.add(new Pair<String, Integer>("humminbird indash depth finder", 1616650484));
+			input.add(new Pair<String, Integer>("grepsr800 gre dig scanner", 215723895));
+			input.add(new Pair<String, Integer>("humminbird kayak transducer", 215392426));
+			input.add(new Pair<String, Integer>("garmin nuvi suction cup mount ", 215728710));
+			input.add(new Pair<String, Integer>("crosley radio black", 215662289));
+
+		    input.add(new Triple<String, Integer, String >("avaya ip telephone", 1440488008, "lucent phone system"));
+			input.add(new Triple<String, Integer, String>("clarks trolley womens shoes", 1581854074, "clark womens shoes"));
+			input.add(new Triple<String, Integer, String>("mens evans shoes imperial deer", 210808400, "lb evans slippers"));
+			input.add(new Triple<String, Integer, String>("ugg classic bow shorty gloves", 1665094898, "leather gloves women"));
+			input.add(new Triple<String, Integer, String>("jumping beans man tee baby", 1667155332, "jumping beans clothing"));
+			input.add(new Triple<String, Integer, String>("asics mens shoes", 1630208773, "asics mens running shoes"));
+			input.add(new Triple<String, Integer, String>("oakley hoodie mens fleece", 1656661466, "hoodies for men"));
+			input.add(new Triple<String, Integer, String>("usb sound control digital voice recorder", 1654662662, "digital voice recorder with usb"));
+			input.add(new Triple<String, Integer, String>("motorola bluetooth headset", 215376254, "motorola oasis bluetooth headset"));
+			input.add(new Triple<String, Integer, String>("sony sound bar home theater system", 215450833, "sony sound bar"));
+			input.add(new Triple<String, Integer, String>("jvc full hd everio camcorder", 1664479999, "jvc everio camcorder"));
+		 */
+		
+		 input.add(new Triple<String, Integer, String>("dr martens beckett laceup boots", 1651452641, "doc martin shoes"));
+		 input.add(new Triple<String, Integer, String>("pioneer cd changer",204654672, "pioneer cd player"));
+		 input.add(new Triple<String, Integer, String>("tablet handler strap and desk mount", 1634326303, "tablet holder"));
+		 input.add(new Triple<String, Integer, String>("sockwell loden womens overthecalf socks", 1644572708, "compression stockings, support stockings"));
+		 input.add(new Triple<String, Integer, String>("nike eclipse womens shoes", 1657807048, "nike eclipse ii women s shoe"));
+		 input.add(new Triple<String, Integer, String>("cherokee workwear womens scrub pant black stall",211643295, "cherokee workwear scrubs"));
+		 input.add(new Triple<String, Integer, String>("columbia sportswear jacket ", 1667381935, "columbia omni heat"));
+		 input.add(new Triple<String, Integer, String>("adidas adipure jacket", 1040124787, "adidas track jacket"));
+		 input.add(new Triple<String, Integer, String>("clarks may orchid womens shoes", 1585805688, "clarks loafers"));
+		 input.add(new Triple<String, Integer, String>("levis pants empire blue", 1670283141, "skinny jeans for guys"));
+		 input.add(new Triple<String, Integer, String>("nike jordan black cat tee", 1653598764, "jordan black cat"));
+		 input.add(new Triple<String, Integer, String>("obermeyer womens kassandra down coat", 1670629180, "down winter coats"));
+/*
+		 input.add(new Triple<String, Integer, String>("paramax  surround sound", 835422569, "paramax im3"));
+		 input.add(new Triple<String, Integer, String>("mia quincy wedge", 1285886230, "mia quincy wedge"));
+		 input.add(new Triple<String, Integer, String>("able planet headphones", 1648522886, "able planet nc210g"));
+		 input.add(new Triple<String, Integer, String>("samsung replacement lamp", 695793593, "lamp code bp96"));
+		 input.add(new Triple<String, Integer, String>("paul green emerson boot castagno", 1313967918, "paul green emerson boot"));
+		 input.add(new Triple<String, Integer, String>("bandolino caresse boots", 1448643623, "bandolino caresse boots"));
+		 input.add(new Triple<String, Integer, String>("nine west modiley", 1365998968, "nine west modiley"));
+		 input.add(new Triple<String, Integer, String>("converse chuck taylor  bisay", 1555900934, "turquoise chuck taylors"));
+		 input.add(new Triple<String, Integer, String>("gentle souls bay leaf flats", 1436175162, "gentle souls bay leaf"));
+		 input.add(new Triple<String, Integer, String>("sauce hockey  back hat", 1644440355, "sauce hockey discount code"));
+		 input.add(new Triple<String, Integer, String>("aravon farren oxford shoes", 1644573438, "aravon wef07sh"));
+	*/	 input.add(new Triple<String, Integer, String>("kooba crosby hobo handbags", 1326503038, "kooba crosby"));
+		 input.add(new Triple<String, Integer, String>("bcbgmaxazria sheath dress", 1313949777, "bcbgmaxazria illusion bodice ruched sheath dress"));
+		 input.add(new Triple<String, Integer, String>("billabong boardshorts trunks", 1316823074, "la siesta boardshorts"));
+		 input.add(new Triple<String, Integer, String>("mootsies tootsies boot", 1503727310, "mootsies tootsies draker"));
+		 input.add(new Triple<String, Integer, String>("nine west bootie", 1503730060, "nine west drina"));
+		 input.add(new Triple<String, Integer, String>("playtex support cotton ", 1331026244, "playtex t723"));
+		 input.add(new Triple<String, Integer, String>("fossil morgan satchel taupe", 1355165745, "fossil morgan satchel"));
+		 input.add(new Triple<String, Integer, String>("katonah womens boots brown", 1420057844, "boc katonah boots"));
+		 input.add(new Triple<String, Integer, String>("boot cut jeans supernova", 1363356262, "levis 527 supernova"));
+		 input.add(new Triple<String, Integer, String>("steve madden buckie boot", 1313965918, "steve madden buckie boot"));
+		 input.add(new Triple<String, Integer, String>("charlies horse tshirt", 1428490587, "charlie s horse shirt"));
+		 input.add(new Triple<String, Integer, String>("igloo little playmate ice chest", 205421625, "igloo little playmate"));
+		 input.add(new Triple<String, Integer, String>("mark nason boot", 1313951044, "mark nason rudd"));
+
+
+
+	}
+
+	public static void main(String[] args){
+		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/relevanceEngine/src/test/resources");
+		ReviewBuilderRunner r = new ReviewBuilderRunner();
+		WebPageReviewExtractor extractor = new WebPageReviewExtractor("C:/workspace/relevanceEngine/src/test/resources");
+		for(Triple query_ID : r.input ){
+			String query = (String) query_ID.getFirst();
+			List<String> res = extractor.formReviewsForAProduct(query);
+
+			ProfileReaderWriter.writeReportListStr(res, "formedReviewSentences"+ query +".csv");
+		}
+
+
+
+	}
 }
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
index eca7cc0..537fdf9 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
@@ -1,137 +1,137 @@
-package opennlp.tools.apps.review_builder;
-
-import java.util.List;
-
-public class ReviewObj {
-	
-		long bpid;
-		long pid;
-		float rating;
-		String pros;
-	    String cons;
-		String url;
-		String title;
-		String review;
-		String keywordsName;
-		float score;
-		String[] origSentences;
-		String[] featurePhrases;
-		
-		List<String> originalizedSentences ; //obtained from sentences;
-		List<String> sentimentPhrases ; //obtained from sentences;
-		
-		public ReviewObj(long bpid, long pid, float rating, String pros,
-				String cons, String url, String title, String review,
-				float score) {
-			super();
-			this.bpid = bpid;
-			this.pid = pid;
-			this.rating = rating;
-			this.pros = pros;
-			this.cons = cons;
-			this.url = url;
-			this.title = title;
-			this.review = review;
-			this.score = score;
-		}
-		
-		
-		public List<String> getSentimentPhrases() {
-			return sentimentPhrases;
-		}
-
-
-		public void setSentimentPhrases(List<String> sentimentPhrases) {
-			this.sentimentPhrases = sentimentPhrases;
-		}
-
-
-		public ReviewObj() {
-		}
-		
-		public String[] getOrigSentences() {
-			return origSentences;
-		}
-		public void setOrigSentences(String[] sentences) {
-			this.origSentences = sentences;
-		}
-		public List<String> getOriginalizedSentences() {
-			return originalizedSentences;
-		}
-
-
-		public void setOriginalizedSentences(List<String> originalizedSentences) {
-			this.originalizedSentences = originalizedSentences;
-		}
-
-
-		public String[] getFeaturePhrases() {
-			return featurePhrases;
-		}
-		public void setFeaturePhrases(String[] featurePhrases) {
-			this.featurePhrases = featurePhrases;
-		}
-		public long getBpid() {
-			return bpid;
-		}
-		public void setBpid(long bpid) {
-			this.bpid = bpid;
-		}
-		public long getPid() {
-			return pid;
-		}
-		public void setPid(long pid) {
-			this.pid = pid;
-		}
-		public float getRating() {
-			return rating;
-		}
-		public void setRating(float rating) {
-			this.rating = rating;
-		}
-		public String getPros() {
-			return pros;
-		}
-		public void setPros(String pros) {
-			this.pros = pros;
-		}
-		public String getCons() {
-			return cons;
-		}
-		public void setCons(String cons) {
-			this.cons = cons;
-		}
-		public String getUrl() {
-			return url;
-		}
-		public void setUrl(String url) {
-			this.url = url;
-		}
-		public String getTitle() {
-			return title;
-		}
-		public void setTitle(String title) {
-			this.title = title;
-		}
-		public String getReview() {
-			return review;
-		}
-		public void setReview(String review) {
-			this.review = review;
-		}
-		public float getScore() {
-			return score;
-		}
-		public void setScore(float score) {
-			this.score = score;
-		}
-		public String getKeywordsName() {
-			
-			return this.keywordsName;
-		}
-		public void setKeywordsName(String kw) {
-			
-			keywordsName=kw;
-		}
-			
-}
+package opennlp.tools.apps.review_builder;
+
+import java.util.List;
+
+public class ReviewObj {
+	
+		long bpid;
+		long pid;
+		float rating;
+		String pros;
+	    String cons;
+		String url;
+		String title;
+		String review;
+		String keywordsName;
+		float score;
+		String[] origSentences;
+		String[] featurePhrases;
+		
+		List<String> originalizedSentences ; //obtained from sentences;
+		List<String> sentimentPhrases ; //obtained from sentences;
+		
+		public ReviewObj(long bpid, long pid, float rating, String pros,
+				String cons, String url, String title, String review,
+				float score) {
+			super();
+			this.bpid = bpid;
+			this.pid = pid;
+			this.rating = rating;
+			this.pros = pros;
+			this.cons = cons;
+			this.url = url;
+			this.title = title;
+			this.review = review;
+			this.score = score;
+		}
+		
+		
+		public List<String> getSentimentPhrases() {
+			return sentimentPhrases;
+		}
+
+
+		public void setSentimentPhrases(List<String> sentimentPhrases) {
+			this.sentimentPhrases = sentimentPhrases;
+		}
+
+
+		public ReviewObj() {
+		}
+		
+		public String[] getOrigSentences() {
+			return origSentences;
+		}
+		public void setOrigSentences(String[] sentences) {
+			this.origSentences = sentences;
+		}
+		public List<String> getOriginalizedSentences() {
+			return originalizedSentences;
+		}
+
+
+		public void setOriginalizedSentences(List<String> originalizedSentences) {
+			this.originalizedSentences = originalizedSentences;
+		}
+
+
+		public String[] getFeaturePhrases() {
+			return featurePhrases;
+		}
+		public void setFeaturePhrases(String[] featurePhrases) {
+			this.featurePhrases = featurePhrases;
+		}
+		public long getBpid() {
+			return bpid;
+		}
+		public void setBpid(long bpid) {
+			this.bpid = bpid;
+		}
+		public long getPid() {
+			return pid;
+		}
+		public void setPid(long pid) {
+			this.pid = pid;
+		}
+		public float getRating() {
+			return rating;
+		}
+		public void setRating(float rating) {
+			this.rating = rating;
+		}
+		public String getPros() {
+			return pros;
+		}
+		public void setPros(String pros) {
+			this.pros = pros;
+		}
+		public String getCons() {
+			return cons;
+		}
+		public void setCons(String cons) {
+			this.cons = cons;
+		}
+		public String getUrl() {
+			return url;
+		}
+		public void setUrl(String url) {
+			this.url = url;
+		}
+		public String getTitle() {
+			return title;
+		}
+		public void setTitle(String title) {
+			this.title = title;
+		}
+		public String getReview() {
+			return review;
+		}
+		public void setReview(String review) {
+			this.review = review;
+		}
+		public float getScore() {
+			return score;
+		}
+		public void setScore(float score) {
+			this.score = score;
+		}
+		public String getKeywordsName() {
+			
+			return this.keywordsName;
+		}
+		public void setKeywordsName(String kw) {
+			
+			keywordsName=kw;
+		}
+			
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
index c4bebb1..9c87e7f 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
@@ -1,59 +1,59 @@
-package opennlp.tools.apps.review_builder;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import opennlp.tools.textsimilarity.ParseTreeChunk;
-
-public class SentenceBeingOriginalized {
-	private Map<String, String> sentKey_value= new HashMap<String, String>();
-	private String sentence;
-	private List<List<ParseTreeChunk>> groupedChunks;
-	
-	
-	
-	public Map<String, String> getSentKey_value() {
-		return sentKey_value;
-	}
-
-
-
-	public void setSentKey_value(Map<String, String> sentKey_value) {
-		this.sentKey_value = sentKey_value;
-	}
-
-
-
-	public String getSentence() {
-		return sentence;
-	}
-
-
-
-	public void setSentence(String sentence) {
-		this.sentence = sentence;
-	}
-
-
-
-	public List<List<ParseTreeChunk>> getGroupedChunks() {
-		return groupedChunks;
-	}
-
-
-
-	public void setGroupedChunks(List<List<ParseTreeChunk>> groupedChunks) {
-		this.groupedChunks = groupedChunks;
-	}
-
-
-
-	public SentenceBeingOriginalized(Map<String, String> sentKey_value,
-			String sentence, List<List<ParseTreeChunk>> groupedChunks) {
-		super();
-		this.sentKey_value = sentKey_value;
-		this.sentence = sentence;
-		this.groupedChunks = groupedChunks;
-	}
-}
+package opennlp.tools.apps.review_builder;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.textsimilarity.ParseTreeChunk;
+
+public class SentenceBeingOriginalized {
+	private Map<String, String> sentKey_value= new HashMap<String, String>();
+	private String sentence;
+	private List<List<ParseTreeChunk>> groupedChunks;
+	
+	
+	
+	public Map<String, String> getSentKey_value() {
+		return sentKey_value;
+	}
+
+
+
+	public void setSentKey_value(Map<String, String> sentKey_value) {
+		this.sentKey_value = sentKey_value;
+	}
+
+
+
+	public String getSentence() {
+		return sentence;
+	}
+
+
+
+	public void setSentence(String sentence) {
+		this.sentence = sentence;
+	}
+
+
+
+	public List<List<ParseTreeChunk>> getGroupedChunks() {
+		return groupedChunks;
+	}
+
+
+
+	public void setGroupedChunks(List<List<ParseTreeChunk>> groupedChunks) {
+		this.groupedChunks = groupedChunks;
+	}
+
+
+
+	public SentenceBeingOriginalized(Map<String, String> sentKey_value,
+			String sentence, List<List<ParseTreeChunk>> groupedChunks) {
+		super();
+		this.sentKey_value = sentKey_value;
+		this.sentence = sentence;
+		this.groupedChunks = groupedChunks;
+	}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
index e275bb5..1e53d2a 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
@@ -1,398 +1,398 @@
-package opennlp.tools.apps.review_builder;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.commons.lang.StringUtils;
-
-import opennlp.tools.apps.relevanceVocabs.PhraseProcessor;
-import opennlp.tools.apps.relevanceVocabs.SentimentVocab;
-import opennlp.tools.apps.relevanceVocabs.SynonymListFilter;
-import opennlp.tools.textsimilarity.ParseTreeChunk;
-
-public class SentenceOriginalizer {
-	private String[] sents; 
-	private SentenceBeingOriginalized[] sentenceBeingOriginalized;
-	public List<String> formedPhrases = new ArrayList<>();
-
-	private final MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();
-	private final SentimentVocab sVocab = SentimentVocab.getInstance();
-	PhraseProcessor pProc = new PhraseProcessor();
-	SynonymListFilter filter = null;
-	private List<String> verbsShouldStayNoSubstition = Arrays.asList(new String[]{
-			"might", "can", "power", "bonk", "screw", "victimization", "victimize", "victimised", "victimized", "victimise",
-			"hump", "sluttish", "wanton"
-	});
-
-	public SentenceOriginalizer(String[] ss){
-		sentenceBeingOriginalized = new SentenceBeingOriginalized[ss.length];
-		for(int i= 0; i< ss.length; i++){
-			//sentenceBeingOriginalized[i] = new  SentenceBeingOriginalized()
-		}
-	}
-
-	public SentenceOriginalizer(String dir){
-		filter = new  SynonymListFilter(dir);
-	};
-
-	public String[] getSents() {
-		return sents;
-	}
-
-	public void setSents(String[] sents) {
-		this.sents = sents;
-	}
-
-	
-
-	private void substituteProsCons(){
-		for(int i = 0; i< sents.length; i++){
-			if (sents[i]==null)
-				continue;
-
-			sents[i] = sents[i].replace("...", " ").replace("..", " ");
-
-			if (sents[i].startsWith("Pros")){
-				sents[i]="";
-				sents[i+1] = "I liked that "+ sents[i+1];
-			}
-
-			if (sents[i].startsWith("Cons")){
-				sents[i]="";
-				sents[i+1] = "What I did not like was that "+ sents[i+1];
-			}
-		}
-	}
-
-	private void insertProductNameForRefs(String prodName){
-		prodName = prodName.toLowerCase();
-		prodName = StringUtils.trim(prodName);
-		
-		for(int i = 0; i< sents.length; i++){
-			if (sents[i]==null)
-				continue;
-			String snt = sents[i];
-			String line  = snt.replace(" it ", " "+prodName+" ");
-			if (line.equals(snt)){
-				line = snt.replace(" this ", " "+prodName+" ");
-			}
-
-			sents[i]=line;
-		}
-	}
-	
-	private void insertProductNameForRefsFullNameKeywords(String prodName, String keywordsName){
-		prodName = StringUtils.trim(prodName.toLowerCase());
-				
-		for(int i = 0; i< sents.length; i++){
-			Random rand = new Random();
-			double flag = rand.nextDouble();
-			String prodNameCurr = null;
-			if (flag>0.4)
-				prodNameCurr = prodName;
-				else
-					prodNameCurr = keywordsName;
-					
-			if (sents[i]==null)
-				continue;
-			String snt = sents[i];
-			String line  = snt.replace(" it ", " "+prodNameCurr+" ");
-			if (line.equals(snt)){
-				line = snt.replace(" this ", " "+prodNameCurr+" ");
-			}
-
-			sents[i]=line;
-		}
-	}
-
-	private void turnTenseToPast(){
-		for(int i = 0; i< sents.length; i++){
-			if (sents[i]==null)
-				continue;
-			sents[i] = sents[i].replace("to do ", "to d_o_ ");
-			sents[i]=sents[i].replace(" is ", " was ").replace(" done ", " was done ").replace(" are ", " were ")
-					.replace(" do ", " did ").replace(" yes, ", " true, ");
-			sents[i]=sents[i].replace("somebody ", "one ").replace("would like", "would want").replace("I am", "users are");
-			sents[i]=sents[i].replace("my wife", "my spouse").replace("I would definitely buy ", "I wouldn't hesitate to buy ")
-					.replace("I haven't tried ", "I did not actually have a chance to try ");
-			sents[i]=sents[i].replace("they arrived ", "they were shipped to my residence ").replace(" ive ", " I have ")
-					.replace("We have ", "I have already tried and written a review on ");
-			
-			sents[i] = sents[i].replace( "to d_o_ ", "to do ");
-	
-			if (sents[i].startsWith("We "))
-				sents[i] = sents[i].replace("We ", "I know they ");
-			if (sents[i].startsWith("You "))
-				sents[i] = sents[i].replace("You ","I believe one can ");
-			
-			if (sents[i].startsWith("Well "))
-				sents[i] = sents[i].replace("Well ","I would state that ");
-
-		}
-	}
-
-	private void turnCounterFactual(){
-		for(int i = 0; i< sents.length; i++){
-			if (sents[i]==null)
-				continue;
-			sents[i]=sents[i].replace("however ", "b1ut1 ").replace("but ", "however ")
-					.replace("b1ut1 ", "but ").replace("I say", "I repeat").
-					replace("same way", "same manner").replace(" you ", " somebody ").replace(" can ", " might ");
-
-		}
-	}
-
-	public void substituteSynonymVerbs(){
-		for(int i = 0; i< sents.length; i++){
-			String line = sents[i];
-			List<List<ParseTreeChunk>> ps = pProc.getPhrasesOfAllTypes(line);
-			if (ps==null || ps.size()<2)
-				continue;
-
-			List<ParseTreeChunk> vps = ps.get(1);
-
-			extractNounPhrasesWithSentiments(ps.get(0));
-
-			line = substituteSentimentSynonyms(line, ps);
-
-			if (vps==null)
-				continue;
-			boolean bVerbRule = false;
-			if (vps.size()==1)
-				line = rePhraser.rePhrase(line);
-			else {
-				if (vps.size()>1)
-
-					for (ParseTreeChunk v: vps){
-						String verbLemma = v.getLemmas().get(0);
-						String newVerb = filter.getSynonym(verbLemma);
-						if (newVerb!=null && newVerb.length()>3 && verbLemma.length()>3 // both old and new words should be above 3
-								&& !newVerb.endsWith("ness") // empirical rule
-								&& !verbsShouldStayNoSubstition.contains(verbLemma) &&
-								!verbsShouldStayNoSubstition.contains(newVerb)	){
-							line = line.replace(verbLemma+" ", newVerb+" "); 	
-							line = line.replace(" "+verbLemma, " "+newVerb); 
-							System.out.println("Synonym for verb substitution: "+verbLemma + "->"+newVerb);
-							bVerbRule = true;
-						}
-					}
-				if (!bVerbRule && vps.size()==2 && Math.random()>0.8) // no other means of originalization worked, so do inverse translation
-					line = rePhraser.rePhrase(line);
-			}
-			sents[i]=line;
-
-		}
-	}
-
-
-	private String substituteSentimentSynonyms(String line,
-			List<List<ParseTreeChunk>> ps) {
-		List<ParseTreeChunk> nounPhrases = ps.get(0);
-		if (nounPhrases.size()<1)
-			return line;
-
-		for(ParseTreeChunk ch: nounPhrases){
-			List<String> lemmas = ch.getLemmas();
-			for(String oldSentim:lemmas){
-				if ( sVocab.isSentimentWord(oldSentim.toLowerCase())) {
-					String newSentim = filter.getSynonym(oldSentim);
-					if (newSentim!=null && newSentim.length()>3 && !verbsShouldStayNoSubstition.contains(newSentim)
-							&& !verbsShouldStayNoSubstition.contains(oldSentim)){
-						line = line.replace(oldSentim+" ", newSentim+" "); 	
-						line = line.replace(" "+oldSentim, " "+newSentim);
-						System.out.println("Synonym for sentiment substitution: "+oldSentim + "->"+newSentim);
-					}
-				}
-			}
-		}
-
-		return line;
-	}
-
-	private void extractNounPhrasesWithSentiments(List<ParseTreeChunk> list) {
-		List<String> phrasesWithSentiments = new ArrayList<String>();
-		for(ParseTreeChunk ch: list){
-			List<String> lemmas = ch.getLemmas();
-			for(String l:lemmas){
-				if ( sVocab.isSentimentWord(l.toLowerCase())) {
-					phrasesWithSentiments.add(lemmas.toString());
-				}
-			}
-		}
-		formedPhrases.addAll(phrasesWithSentiments);
-	}
-
-	public String[] convert(String[] sents, String name, String keywordsName){
-		name = name.replace("Amazon.com:" , "").replace("Amazon.com" , "").replace("..." , " ")
-				.replace("Customer Reviews: ", "");
-
-		this.sents = sents;
-		try {
-			substituteProsCons();
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
-		try {
-			//insertProductNameForRefs(name);
-			insertProductNameForRefsFullNameKeywords(name, keywordsName);
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
-		try {
-			turnTenseToPast();
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
-		try {
-			turnCounterFactual();
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
-
-		try {
-			substituteSynonymVerbs();
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
-		// remove dupes
-		this.formedPhrases = new ArrayList<>(new HashSet<>(this.formedPhrases));
-
-		return sents;
-
-	}
-
-	public static void main(String[] args){
-		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/productsearchfe/src/test/resources");
-		SentenceOriginalizer orig = new SentenceOriginalizer("src/test/resources");
-		String[] sents = new String[] {
-				"Leave the bulky stabilization rig at home and take smooth handheld videos from any angle thanks to Optical SteadyShot image stabilization with Active Mode."
-				//"Other then that, it works well, and the chain stops instantly when you let go of the trigger, or push the safety bar."	
-		};
-		String[] res = orig.convert(sents, "VIP Product", "vv propro");
-		System.out.println(Arrays.asList(res));
-	}
-
-}
-
-/*
- * 1.	Some Amazon specific text keeps showing up so we might want to put a filter on recurring phrases such as:
-1.	Unlimited Free Two-Day Shipping
-2.	View Larger
-3.	What's in the box
-2.	Period/stop added to punctuation marks: 
-1.	!.
-2.	?.
-3.	:.
-4.	.". 
-5.	-.
-3.	Saw some HTML formatting occasionally, such as <em></em>
-4.	Redundancy with choice phrases appearing multiple times in a single review
-5.	Specific issue with words being added at the end of the letter "s," creating nonsensical words:
-1.	It mispronouncesulphur virtually every caller'sulphur name in waysulphur that..
-2.	In fact, it'southward a rare feature that I recollect southwardhould be commonplace in any southwardurround receiver.
-6.	Adding -iness to make nonsensical words: mightinessiness, powerinessiness
-
- */
-
-
-
-/*
- * After using a gasoline powered chain saw for many years had to stop using because of dust and fumes made my copd worse this electric saw is great has surprising amount of power without the gas fumes..
-Nice chainsaw, works great, well built.
-The instant-stop chain is very safe, but a bit abrupt when releasing the trigger.
-I wish there were a half-way release that turned off the motor but did not engage the instant stop break.
-Pros .
-inexpensive compared to gas chainsaws, lightweight, cuts with good power, will do most anything that a gas chainsaw will do. like the automatic chain oiler and easy tension adjustment.
-Cons .
-If you are cutting larger branches and trees, a gas is better.
-However this will work on 8-10" size very well.
-Bought this McCulloch electric chainsaw to replace an old Craftsman electric chain saw. (the Craftsman got ran over by a car).
-Compared to my old Craftsman electric chain saw, the McCulloch seems to be wonderful.
-The first test was to cut a 16" diameter oak branch, cut thru it like hot butter.
-The "no tools needed" chain tensioner seems to be a good design..
-Is a good saw, however it came with the handle that wraps abound the left side of the saw was broken.
-The box looked good, but the saw itself was damaged.
-However, because I had a lot of tree damage in my yard, and more storms coming, I made due with it.
-Other then take, it works well, and the chain stops instantly when you let go of the trigger, or push the safety bar.
-stump w/ this E-saw.
-It keeps doing a super job.
-In terms of a replacement chain, make sure to get the Oregon S-54 (S is style of cutter, 54 means 54 links).
-The MC literature suggests use of a S-55, but it is TOO Long and will soon wind up in the trash can.
-ALSO, the MC factory installed gasket for the lube oil, between the saw and chain bar is total trash.
-When changing out the chain, pull the bar off, pull out and throw away the MC factory gasket, clean the bar and apply a piece of electrical tape, using a knife to cut out a pathway for oil to the bar.
-Will lube perfectly now!
-This is the second electric McCilloch 16" chain saw that I have owned and it is even better and more powerful than the first.
-I still use a gas chain saw out in the woods on my property but I usually do just enough cutting with it to get the logs on a trailer so I can take them bach to my shed to cut them up and save the sawdust for on my garden and flower beds as mulch.
-This electric is lighter and more powerful than my gas saw and makes short work of even 14" well-seasoned oak and poppel logs with a minimum of effort.
-I highly recommend this sae for anyone who has an electric outlet close enough to their cutting station.
-Bought this McCulloch electric chainsaw to replace an old Craftsman electric chain saw. (the Craftsman got ran over by a car).
-Compared to my old Craftsman electric chain saw, the McCulloch seems to be wonderful.
-The first test was to cut a 16" diameter oak branch, cut thru it like hot butter.
-The "no tools needed" chain tensioner seems to be a good design (design seems to be similar to that used by other manufacturers).
-Assuming. this thing keeps cutting/running the same way in the long term, then we have a winner. (note. all the electric chain saws come with cheap looking chains with cutting blades spaced very widely apart along the chain.
-To be ready for the bigger cutting jobs I sprung for a new $18 Oregon s-54 16" chain.).
-Update .
-Having used both gas and electric chain saws for more years than I care to remember, this little beauty is far more than I'd hoped for.
-Yes, it requires a cord to function and, without a handy "Current Bush", serves no useful purpose, but for trimming trees or cutting up firewood in a yard it beats H*** out of fighting the frustration when a gas saw refuses to start or remain running.
-I have another 14" electric MuCulloch along with a 16" gas Homelite and consider this to be a combination of the best qualities of both the others, the convenience of the small electric and the greater cutting ability of the gas powered Homelite.
-This little beauty appears to have as much power as the gas saw without the hassle of mixing fuel and the ongoing maintenence associated with it and cuts far faster than it's small electric brother.
-If I was forced to have a single chainsaw, in my present position(Pleanty of fire wood handy, just in need of cutting to the proper dimensions), this baby would be may choice without any douts or reservations.
-Ordered the Mcculloch 16inch electric chain saw to do some serious pruning of trees around the house which had severe frost damage.
-Although an electric chain saw, it cut through trees up to eight inches like a hot knife through butter.
-Not once did i have problems in two days of cutting.
-The big pros I noticed while using is realtively lightweight for a chainsaw and you can hold in one hand to use.
-Once you release the power switch, the chainsaw chain immediately stops!.
-This is a good thing as it keeps body parts attached.
-One nifty thing about this chainsaw is the chain tightener is outstanding once you figure how it works.
-No tools, just move the knobs and tighten, couldn't be easier and definitely beats hunting down a wrench to tighten.
-Only con is being electric, you have to watch the power cord.
-Very easy to hit extension cord if not careful.
-But it wakes you up when you are tired from your yard work.
-Let a good buddy borrow it and he was also impressed with the ease of use.
-Outstanding for jobs around you house, two thumbs up!
-The McCulloch3516F chainsaw puts an end to my problem of gas engines that don't start when I really need them to.
-I have been cutting out maple branches this summer from trees with verticillium wilt . branches up to 8 inches are no problem at all.
-This saw has an impressive safety feature. a chain brake that stops the saw instantly as soon as the trigger is released or the safety guard is pushed forward.
-I mean instantly. there is a loud clunk as the brake engages and the chain stops dead.
-This takes some getting used to, as the brake engages if you wiggle your finger while running the chainsaw, causing the chain to start and stop.
-There is no concept of "revving" the chain.
-It also means there is no "idle" speed for the chain.
-It is on or off.
-And that is safe.
-You can also consider it a safety feature that the chain has fewer cutting teeth than my gas powered saw chains.
-I don't know the relative operating RPMs .
-if they are about the same, this saw seems to cut a little slower, and fewer teeth would do that.
-This makes the saw less aggressive and less likely to pull out of your control.
-I like that.
-As I say, the cutting ability is well in excess of the 8" branches I've been dealing with.
-The oil fill is conveniently located so that you don't have to tip the saw to fill it, although a small funnel is helpful.
-Overall, I am very happy with this chainsaw.
-The saw works very well, overall.
-I have some minor complaints:.
-1.
-The chain drive gear cover requires a Phillips screwdriver to get the cover off.
-This is just dumb !.
-There's no good reason why it shouldn't have a thumbscrew similar to, but smaller than the chain tensioner thumbscrew.
-As someone pointed out, the chain gear area regularly gets clogged with oily sawdust that needs to be cleaned out.
-I can't figure out a good excuse for this design mistake.
-2 .
-The "instant chain stop" feature woks well, but the remaining motor drivetrain makes a loud howling screech until the motor actually stops.
-Makes me think there might be something wrong with the drivetrain.
-The saw seems to work well, though.
-Time will tell.
-3 .
-The oil filler neck is titled to the side, not vertical to the saw when placed on level ground.
-This makes viewing the oil stream going in and the rising oil level unnecessarily difficult.
-This is another obvious design mistake.
-4 .
-This is my first chainsaw, but it seems the bar oil reservoir is ridiculously small !.
-I have to refill it every 10 minutes of use.
-After reading other reviews for this model I immediately threw out the stock chain without ever using it and replaced it with an Oregon model S52 chain (dual chains is model ST52).
-Note that it fits fine although it is advertized as a 14 inch chain and this saw is advertized to be 16 inches.
-Go figure..
-Also, after reading about the risk of burning up the motor due to using a too lightweight extension cord, I bought a "US Wire 65100 12/3 100-Foot SJTW Orange Heavy Duty Extension Cord".
-It's heavy, alright !
- */
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.lang.StringUtils;
+
+import opennlp.tools.apps.relevanceVocabs.PhraseProcessor;
+import opennlp.tools.apps.relevanceVocabs.SentimentVocab;
+import opennlp.tools.apps.relevanceVocabs.SynonymListFilter;
+import opennlp.tools.textsimilarity.ParseTreeChunk;
+
+public class SentenceOriginalizer {
+	private String[] sents; 
+	private SentenceBeingOriginalized[] sentenceBeingOriginalized;
+	public List<String> formedPhrases = new ArrayList<>();
+
+	private final MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();
+	private final SentimentVocab sVocab = SentimentVocab.getInstance();
+	PhraseProcessor pProc = new PhraseProcessor();
+	SynonymListFilter filter = null;
+	private List<String> verbsShouldStayNoSubstition = Arrays.asList(new String[]{
+			"might", "can", "power", "bonk", "screw", "victimization", "victimize", "victimised", "victimized", "victimise",
+			"hump", "sluttish", "wanton"
+	});
+
+	public SentenceOriginalizer(String[] ss){
+		sentenceBeingOriginalized = new SentenceBeingOriginalized[ss.length];
+		for(int i= 0; i< ss.length; i++){
+			//sentenceBeingOriginalized[i] = new  SentenceBeingOriginalized()
+		}
+	}
+
+	public SentenceOriginalizer(String dir){
+		filter = new  SynonymListFilter(dir);
+	};
+
+	public String[] getSents() {
+		return sents;
+	}
+
+	public void setSents(String[] sents) {
+		this.sents = sents;
+	}
+
+	
+
+	private void substituteProsCons(){
+		for(int i = 0; i< sents.length; i++){
+			if (sents[i]==null)
+				continue;
+
+			sents[i] = sents[i].replace("...", " ").replace("..", " ");
+
+			if (sents[i].startsWith("Pros")){
+				sents[i]="";
+				sents[i+1] = "I liked that "+ sents[i+1];
+			}
+
+			if (sents[i].startsWith("Cons")){
+				sents[i]="";
+				sents[i+1] = "What I did not like was that "+ sents[i+1];
+			}
+		}
+	}
+
+	private void insertProductNameForRefs(String prodName){
+		prodName = prodName.toLowerCase();
+		prodName = StringUtils.trim(prodName);
+		
+		for(int i = 0; i< sents.length; i++){
+			if (sents[i]==null)
+				continue;
+			String snt = sents[i];
+			String line  = snt.replace(" it ", " "+prodName+" ");
+			if (line.equals(snt)){
+				line = snt.replace(" this ", " "+prodName+" ");
+			}
+
+			sents[i]=line;
+		}
+	}
+	
+	private void insertProductNameForRefsFullNameKeywords(String prodName, String keywordsName){
+		prodName = StringUtils.trim(prodName.toLowerCase());
+				
+		for(int i = 0; i< sents.length; i++){
+			Random rand = new Random();
+			double flag = rand.nextDouble();
+			String prodNameCurr = null;
+			if (flag>0.4)
+				prodNameCurr = prodName;
+				else
+					prodNameCurr = keywordsName;
+					
+			if (sents[i]==null)
+				continue;
+			String snt = sents[i];
+			String line  = snt.replace(" it ", " "+prodNameCurr+" ");
+			if (line.equals(snt)){
+				line = snt.replace(" this ", " "+prodNameCurr+" ");
+			}
+
+			sents[i]=line;
+		}
+	}
+
+	private void turnTenseToPast(){
+		for(int i = 0; i< sents.length; i++){
+			if (sents[i]==null)
+				continue;
+			sents[i] = sents[i].replace("to do ", "to d_o_ ");
+			sents[i]=sents[i].replace(" is ", " was ").replace(" done ", " was done ").replace(" are ", " were ")
+					.replace(" do ", " did ").replace(" yes, ", " true, ");
+			sents[i]=sents[i].replace("somebody ", "one ").replace("would like", "would want").replace("I am", "users are");
+			sents[i]=sents[i].replace("my wife", "my spouse").replace("I would definitely buy ", "I wouldn't hesitate to buy ")
+					.replace("I haven't tried ", "I did not actually have a chance to try ");
+			sents[i]=sents[i].replace("they arrived ", "they were shipped to my residence ").replace(" ive ", " I have ")
+					.replace("We have ", "I have already tried and written a review on ");
+			
+			sents[i] = sents[i].replace( "to d_o_ ", "to do ");
+	
+			if (sents[i].startsWith("We "))
+				sents[i] = sents[i].replace("We ", "I know they ");
+			if (sents[i].startsWith("You "))
+				sents[i] = sents[i].replace("You ","I believe one can ");
+			
+			if (sents[i].startsWith("Well "))
+				sents[i] = sents[i].replace("Well ","I would state that ");
+
+		}
+	}
+
+	private void turnCounterFactual(){
+		for(int i = 0; i< sents.length; i++){
+			if (sents[i]==null)
+				continue;
+			sents[i]=sents[i].replace("however ", "b1ut1 ").replace("but ", "however ")
+					.replace("b1ut1 ", "but ").replace("I say", "I repeat").
+					replace("same way", "same manner").replace(" you ", " somebody ").replace(" can ", " might ");
+
+		}
+	}
+
+	public void substituteSynonymVerbs(){
+		for(int i = 0; i< sents.length; i++){
+			String line = sents[i];
+			List<List<ParseTreeChunk>> ps = pProc.getPhrasesOfAllTypes(line);
+			if (ps==null || ps.size()<2)
+				continue;
+
+			List<ParseTreeChunk> vps = ps.get(1);
+
+			extractNounPhrasesWithSentiments(ps.get(0));
+
+			line = substituteSentimentSynonyms(line, ps);
+
+			if (vps==null)
+				continue;
+			boolean bVerbRule = false;
+			if (vps.size()==1)
+				line = rePhraser.rePhrase(line);
+			else {
+				if (vps.size()>1)
+
+					for (ParseTreeChunk v: vps){
+						String verbLemma = v.getLemmas().get(0);
+						String newVerb = filter.getSynonym(verbLemma);
+						if (newVerb!=null && newVerb.length()>3 && verbLemma.length()>3 // both old and new words should be above 3
+								&& !newVerb.endsWith("ness") // empirical rule
+								&& !verbsShouldStayNoSubstition.contains(verbLemma) &&
+								!verbsShouldStayNoSubstition.contains(newVerb)	){
+							line = line.replace(verbLemma+" ", newVerb+" "); 	
+							line = line.replace(" "+verbLemma, " "+newVerb); 
+							System.out.println("Synonym for verb substitution: "+verbLemma + "->"+newVerb);
+							bVerbRule = true;
+						}
+					}
+				if (!bVerbRule && vps.size()==2 && Math.random()>0.8) // no other means of originalization worked, so do inverse translation
+					line = rePhraser.rePhrase(line);
+			}
+			sents[i]=line;
+
+		}
+	}
+
+
+	private String substituteSentimentSynonyms(String line,
+			List<List<ParseTreeChunk>> ps) {
+		List<ParseTreeChunk> nounPhrases = ps.get(0);
+		if (nounPhrases.size()<1)
+			return line;
+
+		for(ParseTreeChunk ch: nounPhrases){
+			List<String> lemmas = ch.getLemmas();
+			for(String oldSentim:lemmas){
+				if ( sVocab.isSentimentWord(oldSentim.toLowerCase())) {
+					String newSentim = filter.getSynonym(oldSentim);
+					if (newSentim!=null && newSentim.length()>3 && !verbsShouldStayNoSubstition.contains(newSentim)
+							&& !verbsShouldStayNoSubstition.contains(oldSentim)){
+						line = line.replace(oldSentim+" ", newSentim+" "); 	
+						line = line.replace(" "+oldSentim, " "+newSentim);
+						System.out.println("Synonym for sentiment substitution: "+oldSentim + "->"+newSentim);
+					}
+				}
+			}
+		}
+
+		return line;
+	}
+
+	private void extractNounPhrasesWithSentiments(List<ParseTreeChunk> list) {
+		List<String> phrasesWithSentiments = new ArrayList<String>();
+		for(ParseTreeChunk ch: list){
+			List<String> lemmas = ch.getLemmas();
+			for(String l:lemmas){
+				if ( sVocab.isSentimentWord(l.toLowerCase())) {
+					phrasesWithSentiments.add(lemmas.toString());
+				}
+			}
+		}
+		formedPhrases.addAll(phrasesWithSentiments);
+	}
+
+	public String[] convert(String[] sents, String name, String keywordsName){
+		name = name.replace("Amazon.com:" , "").replace("Amazon.com" , "").replace("..." , " ")
+				.replace("Customer Reviews: ", "");
+
+		this.sents = sents;
+		try {
+			substituteProsCons();
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		try {
+			//insertProductNameForRefs(name);
+			insertProductNameForRefsFullNameKeywords(name, keywordsName);
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		try {
+			turnTenseToPast();
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		try {
+			turnCounterFactual();
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+
+		try {
+			substituteSynonymVerbs();
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		// remove dupes
+		this.formedPhrases = new ArrayList<>(new HashSet<>(this.formedPhrases));
+
+		return sents;
+
+	}
+
+	public static void main(String[] args){
+		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/productsearchfe/src/test/resources");
+		SentenceOriginalizer orig = new SentenceOriginalizer("src/test/resources");
+		String[] sents = new String[] {
+				"Leave the bulky stabilization rig at home and take smooth handheld videos from any angle thanks to Optical SteadyShot image stabilization with Active Mode."
+				//"Other then that, it works well, and the chain stops instantly when you let go of the trigger, or push the safety bar."	
+		};
+		String[] res = orig.convert(sents, "VIP Product", "vv propro");
+		System.out.println(Arrays.asList(res));
+	}
+
+}
+
+/*
+ * 1.	Some Amazon specific text keeps showing up so we might want to put a filter on recurring phrases such as:
+1.	Unlimited Free Two-Day Shipping
+2.	View Larger
+3.	What's in the box
+2.	Period/stop added to punctuation marks: 
+1.	!.
+2.	?.
+3.	:.
+4.	.". 
+5.	-.
+3.	Saw some HTML formatting occasionally, such as <em></em>
+4.	Redundancy with choice phrases appearing multiple times in a single review
+5.	Specific issue with words being added at the end of the letter "s," creating nonsensical words:
+1.	It mispronouncesulphur virtually every caller'sulphur name in waysulphur that..
+2.	In fact, it'southward a rare feature that I recollect southwardhould be commonplace in any southwardurround receiver.
+6.	Adding -iness to make nonsensical words: mightinessiness, powerinessiness
+
+ */
+
+
+
+/*
+ * After using a gasoline powered chain saw for many years had to stop using because of dust and fumes made my copd worse this electric saw is great has surprising amount of power without the gas fumes..
+Nice chainsaw, works great, well built.
+The instant-stop chain is very safe, but a bit abrupt when releasing the trigger.
+I wish there were a half-way release that turned off the motor but did not engage the instant stop break.
+Pros .
+inexpensive compared to gas chainsaws, lightweight, cuts with good power, will do most anything that a gas chainsaw will do. like the automatic chain oiler and easy tension adjustment.
+Cons .
+If you are cutting larger branches and trees, a gas is better.
+However this will work on 8-10" size very well.
+Bought this McCulloch electric chainsaw to replace an old Craftsman electric chain saw. (the Craftsman got ran over by a car).
+Compared to my old Craftsman electric chain saw, the McCulloch seems to be wonderful.
+The first test was to cut a 16" diameter oak branch, cut thru it like hot butter.
+The "no tools needed" chain tensioner seems to be a good design..
+Is a good saw, however it came with the handle that wraps abound the left side of the saw was broken.
+The box looked good, but the saw itself was damaged.
+However, because I had a lot of tree damage in my yard, and more storms coming, I made due with it.
+Other then take, it works well, and the chain stops instantly when you let go of the trigger, or push the safety bar.
+stump w/ this E-saw.
+It keeps doing a super job.
+In terms of a replacement chain, make sure to get the Oregon S-54 (S is style of cutter, 54 means 54 links).
+The MC literature suggests use of a S-55, but it is TOO Long and will soon wind up in the trash can.
+ALSO, the MC factory installed gasket for the lube oil, between the saw and chain bar is total trash.
+When changing out the chain, pull the bar off, pull out and throw away the MC factory gasket, clean the bar and apply a piece of electrical tape, using a knife to cut out a pathway for oil to the bar.
+Will lube perfectly now!
+This is the second electric McCilloch 16" chain saw that I have owned and it is even better and more powerful than the first.
+I still use a gas chain saw out in the woods on my property but I usually do just enough cutting with it to get the logs on a trailer so I can take them bach to my shed to cut them up and save the sawdust for on my garden and flower beds as mulch.
+This electric is lighter and more powerful than my gas saw and makes short work of even 14" well-seasoned oak and poppel logs with a minimum of effort.
+I highly recommend this sae for anyone who has an electric outlet close enough to their cutting station.
+Bought this McCulloch electric chainsaw to replace an old Craftsman electric chain saw. (the Craftsman got ran over by a car).
+Compared to my old Craftsman electric chain saw, the McCulloch seems to be wonderful.
+The first test was to cut a 16" diameter oak branch, cut thru it like hot butter.
+The "no tools needed" chain tensioner seems to be a good design (design seems to be similar to that used by other manufacturers).
+Assuming. this thing keeps cutting/running the same way in the long term, then we have a winner. (note. all the electric chain saws come with cheap looking chains with cutting blades spaced very widely apart along the chain.
+To be ready for the bigger cutting jobs I sprung for a new $18 Oregon s-54 16" chain.).
+Update .
+Having used both gas and electric chain saws for more years than I care to remember, this little beauty is far more than I'd hoped for.
+Yes, it requires a cord to function and, without a handy "Current Bush", serves no useful purpose, but for trimming trees or cutting up firewood in a yard it beats H*** out of fighting the frustration when a gas saw refuses to start or remain running.
+I have another 14" electric MuCulloch along with a 16" gas Homelite and consider this to be a combination of the best qualities of both the others, the convenience of the small electric and the greater cutting ability of the gas powered Homelite.
+This little beauty appears to have as much power as the gas saw without the hassle of mixing fuel and the ongoing maintenence associated with it and cuts far faster than it's small electric brother.
+If I was forced to have a single chainsaw, in my present position(Pleanty of fire wood handy, just in need of cutting to the proper dimensions), this baby would be may choice without any douts or reservations.
+Ordered the Mcculloch 16inch electric chain saw to do some serious pruning of trees around the house which had severe frost damage.
+Although an electric chain saw, it cut through trees up to eight inches like a hot knife through butter.
+Not once did i have problems in two days of cutting.
+The big pros I noticed while using is realtively lightweight for a chainsaw and you can hold in one hand to use.
+Once you release the power switch, the chainsaw chain immediately stops!.
+This is a good thing as it keeps body parts attached.
+One nifty thing about this chainsaw is the chain tightener is outstanding once you figure how it works.
+No tools, just move the knobs and tighten, couldn't be easier and definitely beats hunting down a wrench to tighten.
+Only con is being electric, you have to watch the power cord.
+Very easy to hit extension cord if not careful.
+But it wakes you up when you are tired from your yard work.
+Let a good buddy borrow it and he was also impressed with the ease of use.
+Outstanding for jobs around you house, two thumbs up!
+The McCulloch3516F chainsaw puts an end to my problem of gas engines that don't start when I really need them to.
+I have been cutting out maple branches this summer from trees with verticillium wilt . branches up to 8 inches are no problem at all.
+This saw has an impressive safety feature. a chain brake that stops the saw instantly as soon as the trigger is released or the safety guard is pushed forward.
+I mean instantly. there is a loud clunk as the brake engages and the chain stops dead.
+This takes some getting used to, as the brake engages if you wiggle your finger while running the chainsaw, causing the chain to start and stop.
+There is no concept of "revving" the chain.
+It also means there is no "idle" speed for the chain.
+It is on or off.
+And that is safe.
+You can also consider it a safety feature that the chain has fewer cutting teeth than my gas powered saw chains.
+I don't know the relative operating RPMs .
+if they are about the same, this saw seems to cut a little slower, and fewer teeth would do that.
+This makes the saw less aggressive and less likely to pull out of your control.
+I like that.
+As I say, the cutting ability is well in excess of the 8" branches I've been dealing with.
+The oil fill is conveniently located so that you don't have to tip the saw to fill it, although a small funnel is helpful.
+Overall, I am very happy with this chainsaw.
+The saw works very well, overall.
+I have some minor complaints:.
+1.
+The chain drive gear cover requires a Phillips screwdriver to get the cover off.
+This is just dumb !.
+There's no good reason why it shouldn't have a thumbscrew similar to, but smaller than the chain tensioner thumbscrew.
+As someone pointed out, the chain gear area regularly gets clogged with oily sawdust that needs to be cleaned out.
+I can't figure out a good excuse for this design mistake.
+2 .
+The "instant chain stop" feature woks well, but the remaining motor drivetrain makes a loud howling screech until the motor actually stops.
+Makes me think there might be something wrong with the drivetrain.
+The saw seems to work well, though.
+Time will tell.
+3 .
+The oil filler neck is titled to the side, not vertical to the saw when placed on level ground.
+This makes viewing the oil stream going in and the rising oil level unnecessarily difficult.
+This is another obvious design mistake.
+4 .
+This is my first chainsaw, but it seems the bar oil reservoir is ridiculously small !.
+I have to refill it every 10 minutes of use.
+After reading other reviews for this model I immediately threw out the stock chain without ever using it and replaced it with an Oregon model S52 chain (dual chains is model ST52).
+Note that it fits fine although it is advertized as a 14 inch chain and this saw is advertized to be 16 inches.
+Go figure..
+Also, after reading about the risk of burning up the motor due to using a too lightweight extension cord, I bought a "US Wire 65100 12/3 100-Foot SJTW Orange Heavy Duty Extension Cord".
+It's heavy, alright !
+ */
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java
index 467942d..c573e46 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java
@@ -1,21 +1,21 @@
-package opennlp.tools.apps.review_builder;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.similarity.apps.BingQueryRunner;
-import opennlp.tools.similarity.apps.HitBase;
-
-public class URLsWithReviewFinderByProductName {
-BingQueryRunner search = new BingQueryRunner();
-	
-	public List<String> findFacebookURLByNameAndZip(String name){
-		List<HitBase> foundFBPages = search.runSearch(name, 20);
-		List<String> results = new ArrayList<String>();
-		for(HitBase h: foundFBPages){
-			results.add(h.getUrl());
-		}
-		return results;
-	}
-	
-}
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.BingQueryRunner;
+import opennlp.tools.similarity.apps.HitBase;
+
+public class URLsWithReviewFinderByProductName {
+BingQueryRunner search = new BingQueryRunner();
+	
+	public List<String> findFacebookURLByNameAndZip(String name){
+		List<HitBase> foundFBPages = search.runSearch(name, 20);
+		List<String> results = new ArrayList<String>();
+		for(HitBase h: foundFBPages){
+			results.add(h.getUrl());
+		}
+		return results;
+	}
+	
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
index a035ec6..de3f5a6 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
@@ -1,436 +1,436 @@
-package opennlp.tools.apps.review_builder;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import opennlp.tools.jsmlearning.ProfileReaderWriter;
-import opennlp.tools.parse_thicket.apps.WebPageExtractor;
-import opennlp.tools.similarity.apps.HitBase;
-import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
-import opennlp.tools.similarity.apps.utils.Utils;
-import opennlp.tools.textsimilarity.TextProcessor;
-import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
-
-import org.apache.commons.lang.StringUtils;
-
-public class WebPageReviewExtractor extends WebPageExtractor {
-	
-	private final BingAPIProductSearchManager prodman = new BingAPIProductSearchManager();
-	private final SentenceOriginalizer orig;
-		
-	public WebPageReviewExtractor(String resourceDir) {
-		orig = new SentenceOriginalizer(resourceDir);
-	}
-
-	public String[] removeDuplicates(String[] hits)
-	{
-		StringDistanceMeasurer meas = new StringDistanceMeasurer();
-
-		List<Integer> idsToRemove = new ArrayList<>();
-		List<String> hitsDedup = new ArrayList<>();
-		try {
-			for (int i = 0; i < hits.length; i++)
-				for (int j = i + 1; j < hits.length; j++)
-				{
-					String title1 = hits[i];
-					String title2 = hits[j];
-					if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))
-						continue;
-					if (meas.measureStringDistance(title1, title2) > 0.7)
-					{
-						idsToRemove.add(j); // dupes found, later list member to
-											// be deleted
-					}
-				}
-			for (int i = 0; i < hits.length; i++)
-				if (!idsToRemove.contains(i))
-					hitsDedup.add(hits[i]);
-			if (hitsDedup.size() < hits.length) {
-				System.out.println("Removed duplicates from relevant search results, including "
-					+ hits[idsToRemove.get(0)]);
-			}
-		}
-		catch (Exception e) {
-			System.out.println("Problem removing duplicates from relevant images");
-		}
-
-		return hitsDedup.toArray(new String[0]);
-
-	}
-
-	public ReviewObj extractSentencesWithPotentialReviewPhrases(String url) {
-		ReviewObj reviewObj = new ReviewObj();
-		int maxSentsFromPage= 20;
-		List<String[]> results = new ArrayList<String[]>();
-
-		String downloadedPage = pageFetcher.fetchPage(url, 20000);
-		if (downloadedPage == null || downloadedPage.length() < 100)
-		{
-			return null;
-		}
-
-		String pageOrigHTML = pageFetcher.fetchOrigHTML(url);
-
-		List<String> productFeaturesList = new ArrayList<String> ();
-		String[] productFeatures = StringUtils.substringsBetween(pageOrigHTML, "<li>", "</li>" );
-		if (productFeatures!=null){
-			for(String item: productFeatures ){
-				if (item.contains("class") || item.contains("www.") || item.contains("href"))
-					continue;
-				item = item.replace("<span>","").replace("</span>","").replace("<b>","").replace("</b>","");
-				if (item.length()>80 && MinedSentenceProcessor.acceptableMinedSentence(item)==null){
-					// TODO OPENNLP-1454 Candidate for logger.debug(...) if required/helpful
-					// System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+item);
-					continue;
-				}
-				productFeaturesList .add(item);
-			}
-		}
-		
-		productFeaturesList = cleanProductFeatures(productFeaturesList);
-		
-		String startArea = StringUtils.substringBetween(pageOrigHTML, "reviewHistoPop", "t of 5 stars");
-		String item =  StringUtils.substringBetween(startArea, "title=\"","ou" );
-		if (item==null){//title="4.0 out of 5 stars" ><span>4.0 out of 5 stars</span>
-			int index = pageOrigHTML.indexOf("of 5 stars\"");
-			startArea = StringUtils.substringBetween(pageOrigHTML, "of 5 stars\"", "of 5 stars");
-			item =  StringUtils.substringBetween(startArea, "<span>","ou" );
-		}
-
-		// if found, process
-		if (item!=null){
-			try {
-				float rating = Float.parseFloat(item);
-				reviewObj.setRating(rating);
-			} catch (NumberFormatException e) {
-				e.printStackTrace();
-			}
-		}
-		//productFeaturesList .add(item);
-
-		downloadedPage= downloadedPage.replace("     ", "&");
-		downloadedPage = downloadedPage.replaceAll("(?:&)+", "#");
-		String[] sents = downloadedPage.split("#");
-		List<TextChunk> sentsList = new ArrayList<TextChunk>();
-		for(String s: sents){
-			s = s.trim().replace("  ", ". ").replace("..", ".").replace(". . .", " ")
-					.replace(": ", ". ").replace("- ", ". ").
-					replace (". .",".").trim();
-			sentsList.add(new TextChunk(s, s.length()));
-		}
-
-		sentsList.sort(new TextChunkComparable());
-		String[] longestSents = new String[maxSentsFromPage];
-		int j=0;														// -1 removed
-		for(int i=sentsList.size()-1 -maxSentsFromPage; i< sentsList.size()&& j<longestSents.length; i++){
-			longestSents[j] = sentsList.get(i).text;
-			j++;
-		}
-
-		sents = cleanListOfSents(longestSents);
-		sents = removeDuplicates(sents);
-		sents = verifyEnforceStartsUpperCase(sents);
-
-		reviewObj.setFeaturePhrases(productFeaturesList.toArray(new String[0]));
-		reviewObj.setOrigSentences(sents);
-
-		return reviewObj;
-	}
-
-	private String[] verifyEnforceStartsUpperCase(String[] sents) {
-		for(int i=0; i<sents.length; i++){
-			String s = sents[i];
-			s = StringUtils.trim(s);
-			String sFirstChar = s.substring(0, 1);
-			if (!sFirstChar.toUpperCase().equals(sFirstChar)){
-				s = sFirstChar.toUpperCase()+s.substring(1);
-			}
-			sents[i] = s;
-		}
-			return sents;
-	}
-
-	private List<String> cleanProductFeatures(List<String> productFeaturesList) {
-		List<String> results = new ArrayList<>();
-		for(String feature: productFeaturesList){
-			if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)
-				continue;
-			results.add(feature);
-		}
-		return results;
-	}
-
-	protected String[] cleanListOfSents(String[] longestSents)
-	{
-		float minFragmentLength = 40, minFragmentLengthSpace=4;
-
-		List<String> sentsClean = new ArrayList<>();
-		for (String sentenceOrMultSent : longestSents) {
-			if (MinedSentenceProcessor.acceptableMinedSentence(sentenceOrMultSent)==null){
-				// System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+sentenceOrMultSent);
-				continue;
-			}
-			// aaa. hhh hhh.  kkk . kkk ll hhh. lll kkk n.
-			int numOfDots = sentenceOrMultSent.replace('.','&').split("&").length;
-			float avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;
-			if ( avgSentenceLengthInTextPortion<minFragmentLength)
-				continue;
-			// o oo o ooo o o o ooo oo ooo o o oo
-			numOfDots = sentenceOrMultSent.replace(' ','&').split("&").length;
-			avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;
-			if ( avgSentenceLengthInTextPortion<minFragmentLengthSpace)
-				continue;
-
-			List<String> furtherSplit = TextProcessor.splitToSentences(sentenceOrMultSent);
-			
-			// forced split by ',' somewhere in the middle of sentence
-			// disused - Feb 26 13
-			//furtherSplit = furtherMakeSentencesShorter(furtherSplit);
-			furtherSplit.remove(furtherSplit.size()-1);
-			for(String s : furtherSplit){
-				if (s.indexOf('|')>-1)
-					continue;
-				s = s.replace("<em>"," ").replace("</em>"," ");
-				s = Utils.convertToASCII(s);
-				sentsClean.add(s);
-			}
-		}
-
-		return sentsClean.toArray(new String[0]);
-	}
-
-	private List<String> furtherMakeSentencesShorter(List<String> furtherSplit) {
-		int MIN_LENGTH_TO_SPLIT = 80;
-		List<String> results = new ArrayList<>();
-		for(String sent: furtherSplit) {
-			sent = startWithCapitalSent(sent);
-			int len = sent.length(); 
-			if (len <MIN_LENGTH_TO_SPLIT)
-				results.add(sent);
-			else {
-				try {
-					int commaIndex = StringUtils.indexOf(sent, ',');
-					int lastCommaIndex = StringUtils.lastIndexOf(sent, ',');
-					int splitIndex = -1;
-					if (Math.abs(commaIndex- len/2) > Math.abs(lastCommaIndex- len/2))
-						splitIndex = commaIndex;
-					else
-						splitIndex = lastCommaIndex;
-					if (splitIndex<0)
-						results.add(sent);
-					else {
-						String sent1 = sent.substring(0, splitIndex)+". ";
-						String sent2 = startWithCapitalSent(sent.substring(splitIndex+1));
-						results.add(sent1); results.add(sent2);
-					}
-				} catch (Exception e) {
-					results.add(sent);
-					e.printStackTrace();
-				}
-
-			}
-		}
-		return results;
-	}
-
-	private String startWithCapitalSent(String sent) {
-		String firstChar = sent.substring(0,1);
-		String remainder = sent.substring(1);
-		
-		return firstChar.toUpperCase()+remainder;
-	}
-
-	public List<String> formReviewsForAProduct(String name /*long bpid, String keywordsName*/){
-		ReviewObj reviewObjTotal = null;
-		try {
-			List<HitBase> pagesForAProduct = prodman.findProductByName(name, 1);
-			reviewObjTotal = null; 
-
-			for(HitBase p: pagesForAProduct){
-				ReviewObj reviewObj = 
-						extractSentencesWithPotentialReviewPhrases(p.getUrl());
-				// init with first element
-				if (reviewObjTotal  == null)
-					reviewObjTotal = reviewObj;
-				if (reviewObj==null)
-					continue;
-				String[] afterOriginalization = orig.convert(reviewObj.getOrigSentences(), p.getTitle(), reviewObj.getKeywordsName());
-				reviewObj.setOriginalizedSentences(Arrays.asList(afterOriginalization));
-				reviewObj.setSentimentPhrases(orig.formedPhrases);
-
-				List<String> buf = reviewObjTotal.getSentimentPhrases();
-				if (orig.formedPhrases!=null && orig.formedPhrases.size()>0){
-					buf.addAll(orig.formedPhrases);
-					reviewObjTotal.setSentimentPhrases(buf);
-				}
-
-		/*		buf = reviewObjTotal.getOriginalizedSentences();
-				if (buf!=null && afterOriginalization!=null && afterOriginalization.length>0){
-					List<String> b1 = Arrays.asList(afterOriginalization);
-					List<String> b2 = new ArrayList<String>();
-					b2.addAll(buf);
-					b2.addAll(new ArrayList<String>(b1));
-					reviewObjTotal.setOriginalizedSentences(b2);
-				}
-*/
-			}
-			if (reviewObjTotal==null) return new ArrayList<>();
-			
-			List<String> textReviews = buildManyReviewTexts(reviewObjTotal);
-
-			
-		/*	String textReview = buildText(reviewObjTotal);
-			try {
-				if (textReview!=null && textReview.length()>60)
-					ser.saveReviewsToDB(textReview, bpid, pagesForAProduct.get(0).getUrl(), pagesForAProduct.get(0).getTitle(),
-							reviewObjTotal.getSentimentPhrases().toString(), reviewObjTotal.getRating());
-			} catch (Exception e) {
-				System.out.println("Database write failed");
-			}
-			*/
-			
-		} catch (Exception e) {
-			e.printStackTrace();
-		} 
-		return reviewObjTotal.getOriginalizedSentences();
-	}
-
-	private String buildText(ReviewObj reviewObj) {
-
-		String[] features = reviewObj.getFeaturePhrases();
-		List<String> sentences =reviewObj.getOriginalizedSentences();
-		StringBuffer buf = new StringBuffer();
-		int count = 0;
-		for(String sent:sentences){
-			if (sent!=null)
-				buf.append(sent+" ");
-			if (count%2==0 && count<features.length)
-				if (features[count]!=null){
-					buf.append(features[count]);
-					if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?") 
-							||features[count].endsWith(".\"") ))
-						buf.append(". ");
-				}
-
-			if (count%5==0)
-				buf.append("\n");
-			count++;
-		}
-		return buf.toString();
-	}
-	
-	private List<String> buildManyReviewTexts(ReviewObj reviewObj) {
-
-		String[] features = reviewObj.getFeaturePhrases();
-		List<String> sentences =reviewObj.getOriginalizedSentences();
-		
-		// first count how many sentences
-				int NUM_SENTS_IN_REVIEW = 7;
-				int count=0;
-				for(String sent:sentences){
-					if (sent!=null)
-						count++;
-				}
-		int nReviews = count/NUM_SENTS_IN_REVIEW;
-		if (nReviews<1)
-			nReviews=1;
-		StringBuffer[] bufs = new StringBuffer[nReviews];
-		for(int i=0; i<bufs.length; i++){
-			bufs[i] = new StringBuffer();
-		}
-				
-		count = 0;
-		int currentRevIndex = 0;
-		for(String sent:sentences){
-			if (sent!=null)
-				bufs[currentRevIndex].append(sent+" ");
-			if (count%2==0 && count<features.length)
-				if (features[count]!=null){
-					bufs[currentRevIndex].append(features[count]);
-					if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?") 
-							||features[count].endsWith(".\"") ))
-						bufs[currentRevIndex].append(". ");
-				}
-
-			try {
-				if (bufs[currentRevIndex].toString().split(".").length>4)
-					bufs[currentRevIndex].append("\n");
-			} catch (Exception e) {
-				e.printStackTrace();
-			}
-			
-			count++;
-			currentRevIndex++;
-			if (currentRevIndex>=nReviews)
-				currentRevIndex=0;	
-		}
-		
-		List<String> results = new ArrayList<String>();
-		for(StringBuffer b:bufs){
-			String sent = b.toString().replace("!.","!").replace("?.","?");
-			results.add(sent);
-		}
-		return results;
-	}
-
-	public static void main(String[] args){
-		String resourceDir = "C:/stanford-corenlp/src/test/resources/";
-		ParserChunker2MatcherProcessor proc = ParserChunker2MatcherProcessor.getInstance(resourceDir); 
-			
-		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/relevanceEngine/src/test/resources");
-
-		WebPageReviewExtractor extractor = new WebPageReviewExtractor(resourceDir);
-		String res1[] = extractor.verifyEnforceStartsUpperCase(new String[]{ "hhhh !", "Klyn mng hghj ."});
-				
-		List<String> res = extractor.formReviewsForAProduct(//"McCulloch 16-Inch 3.5 HP Electric Chain Saw");
-				//	"WORX Electric JawSaw with Extension Handle");
-				//	"Panasonic 2-Line Digital Cordless System", 215200345l);
-				//	"Sport Silver Dial Women", 215475290);
-				//"Rectangle Area Rug", 213885290);
-				//		"40VA Replacement Transformer", 213085391);
-				//		"PSYLLIUM POWDER Food", 213185391);
-				//		"Leighton Toilet Tank", 213285391);
-				//"Samsung Knack U310 Flip Phone", 214495493);
-				//"Panasonic Cordless Phone 2 handsets", 214870820);
-				//"Winegard TV Antenna Pre-Amplifier", 211924499);
-				//"Atlona AT-HD-V18 HDMI Distribution Amplifier", 215162612);
-				//"airport express base station", 211462827);
-				//"denon  Network Streaming A/V Home Theater receiver", 209565926);
-				//"sherwood receiver 400 watts stereo", 211286714);
-				//"multizone music distribution system", 205333526);
-				//"niles zr4", 215104912);
-				//"alpine waterproof marine cd receiver", 215167695);
-				//"sherwood channel receiver dolby", 215116818);
-				//"sherwood lcd tv widescreen hdtv", 215481917);
-				//"multiroom music distribution system", 205333526);
-				//		"fusion ms compact stereo", 215649463); 
-				//"pyle pro speaker", 213265125);
-				// "apple iphone 4g",  213265325);
-				//"sherwood high performance receiver", 215394729);
-				//"sony camera housing", 211960592);
-				//"sony xl2100", 1135329203);
-				//"sony 18 megapixel-digital-camera", 215743208);
-				//"sony m470 microcassette tape recorder", 205828052);
-				//"sony monitor terminal expansion board", 213244217);
-				//"sony cybershot digital-camera", 215743207);
-				//"sony interchangeable lens handycam camcorder", 215153503);
-				//"canon powershot digital camera", 214754207);
-				//"brother ink pageyield yellow", 204743189);
-				// ?? "garmin 2200 gps navigator", 215167480);
-				"halo portable backup battery");
-
-		ProfileReaderWriter.writeReportListStr(res, "formedReviewSentences4.csv");
-
-
-		/*		
-			res=	extractor. extractSentencesWithPotentialReviewPhrases(//"http://www.sitbetter.com/view/chair/ofm-500-l/ofm--high-back-leather-office-chair/");
-		//"http://www.amazon.com/OFM-High-Back-Leather-Integral-Headrest/dp/B002SIW1E0/ref=sr_1_1?ie=UTF8&qid=1353370254&sr=8-1&keywords=OFM-High-Back-Leather-Integral-Headrest");
-		//"http://www.amazon.com/Oregon-511AX-Chain-Grinder-Sharpener/dp/B0000AX0CY/ref=sr_1_4?s=industrial&ie=UTF8&qid=1353373435&sr=1-4&keywords=chain+saws");
-			//			"http://www.amazon.com/Bearing-UCP204-12-Housing-Mounted-Bearings/dp/B002BBIYWM/ref=sr_1_1?s=industrial&ie=UTF8&qid=1353373786&sr=1-1&keywords=pillow+block+bearing");
-			"http://www.amazon.com/ShelterLogic-20--Feet-Auto-Shelter/dp/B001OFNK8O/ref=sr_1_1?s=lawn-garden&ie=UTF8&qid=1353376677&sr=1-1&keywords=shelterlogic+62680+autoshelter+portable+garage+carport");			
-						System.out.println(res);
-		 */			
-
-	}
-}
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import opennlp.tools.jsmlearning.ProfileReaderWriter;
+import opennlp.tools.parse_thicket.apps.WebPageExtractor;
+import opennlp.tools.similarity.apps.HitBase;
+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
+import opennlp.tools.similarity.apps.utils.Utils;
+import opennlp.tools.textsimilarity.TextProcessor;
+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
+
+import org.apache.commons.lang.StringUtils;
+
+public class WebPageReviewExtractor extends WebPageExtractor {
+	
+	private final BingAPIProductSearchManager prodman = new BingAPIProductSearchManager();
+	private final SentenceOriginalizer orig;
+		
+	public WebPageReviewExtractor(String resourceDir) {
+		orig = new SentenceOriginalizer(resourceDir);
+	}
+
+	public String[] removeDuplicates(String[] hits)
+	{
+		StringDistanceMeasurer meas = new StringDistanceMeasurer();
+
+		List<Integer> idsToRemove = new ArrayList<>();
+		List<String> hitsDedup = new ArrayList<>();
+		try {
+			for (int i = 0; i < hits.length; i++)
+				for (int j = i + 1; j < hits.length; j++)
+				{
+					String title1 = hits[i];
+					String title2 = hits[j];
+					if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))
+						continue;
+					if (meas.measureStringDistance(title1, title2) > 0.7)
+					{
+						idsToRemove.add(j); // dupes found, later list member to
+											// be deleted
+					}
+				}
+			for (int i = 0; i < hits.length; i++)
+				if (!idsToRemove.contains(i))
+					hitsDedup.add(hits[i]);
+			if (hitsDedup.size() < hits.length) {
+				System.out.println("Removed duplicates from relevant search results, including "
+					+ hits[idsToRemove.get(0)]);
+			}
+		}
+		catch (Exception e) {
+			System.out.println("Problem removing duplicates from relevant images");
+		}
+
+		return hitsDedup.toArray(new String[0]);
+
+	}
+
+	public ReviewObj extractSentencesWithPotentialReviewPhrases(String url) {
+		ReviewObj reviewObj = new ReviewObj();
+		int maxSentsFromPage= 20;
+		List<String[]> results = new ArrayList<String[]>();
+
+		String downloadedPage = pageFetcher.fetchPage(url, 20000);
+		if (downloadedPage == null || downloadedPage.length() < 100)
+		{
+			return null;
+		}
+
+		String pageOrigHTML = pageFetcher.fetchOrigHTML(url);
+
+		List<String> productFeaturesList = new ArrayList<String> ();
+		String[] productFeatures = StringUtils.substringsBetween(pageOrigHTML, "<li>", "</li>" );
+		if (productFeatures!=null){
+			for(String item: productFeatures ){
+				if (item.contains("class") || item.contains("www.") || item.contains("href"))
+					continue;
+				item = item.replace("<span>","").replace("</span>","").replace("<b>","").replace("</b>","");
+				if (item.length()>80 && MinedSentenceProcessor.acceptableMinedSentence(item)==null){
+					// TODO OPENNLP-1454 Candidate for logger.debug(...) if required/helpful
+					// System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+item);
+					continue;
+				}
+				productFeaturesList .add(item);
+			}
+		}
+		
+		productFeaturesList = cleanProductFeatures(productFeaturesList);
+		
+		String startArea = StringUtils.substringBetween(pageOrigHTML, "reviewHistoPop", "t of 5 stars");
+		String item =  StringUtils.substringBetween(startArea, "title=\"","ou" );
+		if (item==null){//title="4.0 out of 5 stars" ><span>4.0 out of 5 stars</span>
+			int index = pageOrigHTML.indexOf("of 5 stars\"");
+			startArea = StringUtils.substringBetween(pageOrigHTML, "of 5 stars\"", "of 5 stars");
+			item =  StringUtils.substringBetween(startArea, "<span>","ou" );
+		}
+
+		// if found, process
+		if (item!=null){
+			try {
+				float rating = Float.parseFloat(item);
+				reviewObj.setRating(rating);
+			} catch (NumberFormatException e) {
+				e.printStackTrace();
+			}
+		}
+		//productFeaturesList .add(item);
+
+		downloadedPage= downloadedPage.replace("     ", "&");
+		downloadedPage = downloadedPage.replaceAll("(?:&)+", "#");
+		String[] sents = downloadedPage.split("#");
+		List<TextChunk> sentsList = new ArrayList<TextChunk>();
+		for(String s: sents){
+			s = s.trim().replace("  ", ". ").replace("..", ".").replace(". . .", " ")
+					.replace(": ", ". ").replace("- ", ". ").
+					replace (". .",".").trim();
+			sentsList.add(new TextChunk(s, s.length()));
+		}
+
+		sentsList.sort(new TextChunkComparable());
+		String[] longestSents = new String[maxSentsFromPage];
+		int j=0;														// -1 removed
+		for(int i=sentsList.size()-1 -maxSentsFromPage; i< sentsList.size()&& j<longestSents.length; i++){
+			longestSents[j] = sentsList.get(i).text;
+			j++;
+		}
+
+		sents = cleanListOfSents(longestSents);
+		sents = removeDuplicates(sents);
+		sents = verifyEnforceStartsUpperCase(sents);
+
+		reviewObj.setFeaturePhrases(productFeaturesList.toArray(new String[0]));
+		reviewObj.setOrigSentences(sents);
+
+		return reviewObj;
+	}
+
+	private String[] verifyEnforceStartsUpperCase(String[] sents) {
+		for(int i=0; i<sents.length; i++){
+			String s = sents[i];
+			s = StringUtils.trim(s);
+			String sFirstChar = s.substring(0, 1);
+			if (!sFirstChar.toUpperCase().equals(sFirstChar)){
+				s = sFirstChar.toUpperCase()+s.substring(1);
+			}
+			sents[i] = s;
+		}
+			return sents;
+	}
+
+	private List<String> cleanProductFeatures(List<String> productFeaturesList) {
+		List<String> results = new ArrayList<>();
+		for(String feature: productFeaturesList){
+			if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)
+				continue;
+			results.add(feature);
+		}
+		return results;
+	}
+
+	protected String[] cleanListOfSents(String[] longestSents)
+	{
+		float minFragmentLength = 40, minFragmentLengthSpace=4;
+
+		List<String> sentsClean = new ArrayList<>();
+		for (String sentenceOrMultSent : longestSents) {
+			if (MinedSentenceProcessor.acceptableMinedSentence(sentenceOrMultSent)==null){
+				// System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+sentenceOrMultSent);
+				continue;
+			}
+			// aaa. hhh hhh.  kkk . kkk ll hhh. lll kkk n.
+			int numOfDots = sentenceOrMultSent.replace('.','&').split("&").length;
+			float avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;
+			if ( avgSentenceLengthInTextPortion<minFragmentLength)
+				continue;
+			// o oo o ooo o o o ooo oo ooo o o oo
+			numOfDots = sentenceOrMultSent.replace(' ','&').split("&").length;
+			avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;
+			if ( avgSentenceLengthInTextPortion<minFragmentLengthSpace)
+				continue;
+
+			List<String> furtherSplit = TextProcessor.splitToSentences(sentenceOrMultSent);
+			
+			// forced split by ',' somewhere in the middle of sentence
+			// disused - Feb 26 13
+			//furtherSplit = furtherMakeSentencesShorter(furtherSplit);
+			furtherSplit.remove(furtherSplit.size()-1);
+			for(String s : furtherSplit){
+				if (s.indexOf('|')>-1)
+					continue;
+				s = s.replace("<em>"," ").replace("</em>"," ");
+				s = Utils.convertToASCII(s);
+				sentsClean.add(s);
+			}
+		}
+
+		return sentsClean.toArray(new String[0]);
+	}
+
+	private List<String> furtherMakeSentencesShorter(List<String> furtherSplit) {
+		int MIN_LENGTH_TO_SPLIT = 80;
+		List<String> results = new ArrayList<>();
+		for(String sent: furtherSplit) {
+			sent = startWithCapitalSent(sent);
+			int len = sent.length(); 
+			if (len <MIN_LENGTH_TO_SPLIT)
+				results.add(sent);
+			else {
+				try {
+					int commaIndex = StringUtils.indexOf(sent, ',');
+					int lastCommaIndex = StringUtils.lastIndexOf(sent, ',');
+					int splitIndex = -1;
+					if (Math.abs(commaIndex- len/2) > Math.abs(lastCommaIndex- len/2))
+						splitIndex = commaIndex;
+					else
+						splitIndex = lastCommaIndex;
+					if (splitIndex<0)
+						results.add(sent);
+					else {
+						String sent1 = sent.substring(0, splitIndex)+". ";
+						String sent2 = startWithCapitalSent(sent.substring(splitIndex+1));
+						results.add(sent1); results.add(sent2);
+					}
+				} catch (Exception e) {
+					results.add(sent);
+					e.printStackTrace();
+				}
+
+			}
+		}
+		return results;
+	}
+
+	private String startWithCapitalSent(String sent) {
+		String firstChar = sent.substring(0,1);
+		String remainder = sent.substring(1);
+		
+		return firstChar.toUpperCase()+remainder;
+	}
+
+	public List<String> formReviewsForAProduct(String name /*long bpid, String keywordsName*/){
+		ReviewObj reviewObjTotal = null;
+		try {
+			List<HitBase> pagesForAProduct = prodman.findProductByName(name, 1);
+			reviewObjTotal = null; 
+
+			for(HitBase p: pagesForAProduct){
+				ReviewObj reviewObj = 
+						extractSentencesWithPotentialReviewPhrases(p.getUrl());
+				// init with first element
+				if (reviewObjTotal  == null)
+					reviewObjTotal = reviewObj;
+				if (reviewObj==null)
+					continue;
+				String[] afterOriginalization = orig.convert(reviewObj.getOrigSentences(), p.getTitle(), reviewObj.getKeywordsName());
+				reviewObj.setOriginalizedSentences(Arrays.asList(afterOriginalization));
+				reviewObj.setSentimentPhrases(orig.formedPhrases);
+
+				List<String> buf = reviewObjTotal.getSentimentPhrases();
+				if (orig.formedPhrases!=null && orig.formedPhrases.size()>0){
+					buf.addAll(orig.formedPhrases);
+					reviewObjTotal.setSentimentPhrases(buf);
+				}
+
+		/*		buf = reviewObjTotal.getOriginalizedSentences();
+				if (buf!=null && afterOriginalization!=null && afterOriginalization.length>0){
+					List<String> b1 = Arrays.asList(afterOriginalization);
+					List<String> b2 = new ArrayList<String>();
+					b2.addAll(buf);
+					b2.addAll(new ArrayList<String>(b1));
+					reviewObjTotal.setOriginalizedSentences(b2);
+				}
+*/
+			}
+			if (reviewObjTotal==null) return new ArrayList<>();
+			
+			List<String> textReviews = buildManyReviewTexts(reviewObjTotal);
+
+			
+		/*	String textReview = buildText(reviewObjTotal);
+			try {
+				if (textReview!=null && textReview.length()>60)
+					ser.saveReviewsToDB(textReview, bpid, pagesForAProduct.get(0).getUrl(), pagesForAProduct.get(0).getTitle(),
+							reviewObjTotal.getSentimentPhrases().toString(), reviewObjTotal.getRating());
+			} catch (Exception e) {
+				System.out.println("Database write failed");
+			}
+			*/
+			
+		} catch (Exception e) {
+			e.printStackTrace();
+		} 
+		return reviewObjTotal.getOriginalizedSentences();
+	}
+
+	private String buildText(ReviewObj reviewObj) {
+
+		String[] features = reviewObj.getFeaturePhrases();
+		List<String> sentences =reviewObj.getOriginalizedSentences();
+		StringBuffer buf = new StringBuffer();
+		int count = 0;
+		for(String sent:sentences){
+			if (sent!=null)
+				buf.append(sent+" ");
+			if (count%2==0 && count<features.length)
+				if (features[count]!=null){
+					buf.append(features[count]);
+					if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?") 
+							||features[count].endsWith(".\"") ))
+						buf.append(". ");
+				}
+
+			if (count%5==0)
+				buf.append("\n");
+			count++;
+		}
+		return buf.toString();
+	}
+	
+	private List<String> buildManyReviewTexts(ReviewObj reviewObj) {
+
+		String[] features = reviewObj.getFeaturePhrases();
+		List<String> sentences =reviewObj.getOriginalizedSentences();
+		
+		// first count how many sentences
+				int NUM_SENTS_IN_REVIEW = 7;
+				int count=0;
+				for(String sent:sentences){
+					if (sent!=null)
+						count++;
+				}
+		int nReviews = count/NUM_SENTS_IN_REVIEW;
+		if (nReviews<1)
+			nReviews=1;
+		StringBuffer[] bufs = new StringBuffer[nReviews];
+		for(int i=0; i<bufs.length; i++){
+			bufs[i] = new StringBuffer();
+		}
+				
+		count = 0;
+		int currentRevIndex = 0;
+		for(String sent:sentences){
+			if (sent!=null)
+				bufs[currentRevIndex].append(sent+" ");
+			if (count%2==0 && count<features.length)
+				if (features[count]!=null){
+					bufs[currentRevIndex].append(features[count]);
+					if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?") 
+							||features[count].endsWith(".\"") ))
+						bufs[currentRevIndex].append(". ");
+				}
+
+			try {
+				if (bufs[currentRevIndex].toString().split(".").length>4)
+					bufs[currentRevIndex].append("\n");
+			} catch (Exception e) {
+				e.printStackTrace();
+			}
+			
+			count++;
+			currentRevIndex++;
+			if (currentRevIndex>=nReviews)
+				currentRevIndex=0;	
+		}
+		
+		List<String> results = new ArrayList<String>();
+		for(StringBuffer b:bufs){
+			String sent = b.toString().replace("!.","!").replace("?.","?");
+			results.add(sent);
+		}
+		return results;
+	}
+
+	public static void main(String[] args){
+		String resourceDir = "C:/stanford-corenlp/src/test/resources/";
+		ParserChunker2MatcherProcessor proc = ParserChunker2MatcherProcessor.getInstance(resourceDir); 
+			
+		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/relevanceEngine/src/test/resources");
+
+		WebPageReviewExtractor extractor = new WebPageReviewExtractor(resourceDir);
+		String res1[] = extractor.verifyEnforceStartsUpperCase(new String[]{ "hhhh !", "Klyn mng hghj ."});
+				
+		List<String> res = extractor.formReviewsForAProduct(//"McCulloch 16-Inch 3.5 HP Electric Chain Saw");
+				//	"WORX Electric JawSaw with Extension Handle");
+				//	"Panasonic 2-Line Digital Cordless System", 215200345l);
+				//	"Sport Silver Dial Women", 215475290);
+				//"Rectangle Area Rug", 213885290);
+				//		"40VA Replacement Transformer", 213085391);
+				//		"PSYLLIUM POWDER Food", 213185391);
+				//		"Leighton Toilet Tank", 213285391);
+				//"Samsung Knack U310 Flip Phone", 214495493);
+				//"Panasonic Cordless Phone 2 handsets", 214870820);
+				//"Winegard TV Antenna Pre-Amplifier", 211924499);
+				//"Atlona AT-HD-V18 HDMI Distribution Amplifier", 215162612);
+				//"airport express base station", 211462827);
+				//"denon  Network Streaming A/V Home Theater receiver", 209565926);
+				//"sherwood receiver 400 watts stereo", 211286714);
+				//"multizone music distribution system", 205333526);
+				//"niles zr4", 215104912);
+				//"alpine waterproof marine cd receiver", 215167695);
+				//"sherwood channel receiver dolby", 215116818);
+				//"sherwood lcd tv widescreen hdtv", 215481917);
+				//"multiroom music distribution system", 205333526);
+				//		"fusion ms compact stereo", 215649463); 
+				//"pyle pro speaker", 213265125);
+				// "apple iphone 4g",  213265325);
+				//"sherwood high performance receiver", 215394729);
+				//"sony camera housing", 211960592);
+				//"sony xl2100", 1135329203);
+				//"sony 18 megapixel-digital-camera", 215743208);
+				//"sony m470 microcassette tape recorder", 205828052);
+				//"sony monitor terminal expansion board", 213244217);
+				//"sony cybershot digital-camera", 215743207);
+				//"sony interchangeable lens handycam camcorder", 215153503);
+				//"canon powershot digital camera", 214754207);
+				//"brother ink pageyield yellow", 204743189);
+				// ?? "garmin 2200 gps navigator", 215167480);
+				"halo portable backup battery");
+
+		ProfileReaderWriter.writeReportListStr(res, "formedReviewSentences4.csv");
+
+
+		/*		
+			res=	extractor. extractSentencesWithPotentialReviewPhrases(//"http://www.sitbetter.com/view/chair/ofm-500-l/ofm--high-back-leather-office-chair/");
+		//"http://www.amazon.com/OFM-High-Back-Leather-Integral-Headrest/dp/B002SIW1E0/ref=sr_1_1?ie=UTF8&qid=1353370254&sr=8-1&keywords=OFM-High-Back-Leather-Integral-Headrest");
+		//"http://www.amazon.com/Oregon-511AX-Chain-Grinder-Sharpener/dp/B0000AX0CY/ref=sr_1_4?s=industrial&ie=UTF8&qid=1353373435&sr=1-4&keywords=chain+saws");
+			//			"http://www.amazon.com/Bearing-UCP204-12-Housing-Mounted-Bearings/dp/B002BBIYWM/ref=sr_1_1?s=industrial&ie=UTF8&qid=1353373786&sr=1-1&keywords=pillow+block+bearing");
+			"http://www.amazon.com/ShelterLogic-20--Feet-Auto-Shelter/dp/B001OFNK8O/ref=sr_1_1?s=lawn-garden&ie=UTF8&qid=1353376677&sr=1-1&keywords=shelterlogic+62680+autoshelter+portable+garage+carport");			
+						System.out.println(res);
+		 */			
+
+	}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
index 280e4ec..92d35d1 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
@@ -1,171 +1,171 @@
-package opennlp.tools.apps.utils.email;
-
-import java.util.Properties;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import javax.mail.*;
-import javax.mail.internet.*;
-import javax.activation.*;
-
-/**
- * Responsible to sending e-mails trough a gmail smtp server.
- * It will be extended to handle arbitrary smtp servers.
- * @author GaDo
- *
- */
-public class EmailSender {
-		private static final long serialVersionUID = 1L;
-		private static final String mailboxAddress="boris_galitsky@rambler.ru";
-
-		public  boolean sendMail(String smtp, String user, String pass, InternetAddress from, InternetAddress[] to, InternetAddress[] cc, InternetAddress[] bcc, String subject, String body, String file) throws Exception
-		{
-			boolean correct=true;
-			try
-			{							
-				//Eliminate spaces from addresses
-				if(from!=null){		
-					from.setAddress(from.getAddress().replace(" ","").trim());		}
-					to = eliminateSpaces(to);
-					cc = eliminateSpaces(cc);
-					bcc = eliminateSpaces(bcc);
-					correct = validateAddress(from,to,cc,bcc);
-				
-				if(correct){
-					//Configuracio of the properties -> smtp
-					Properties props = new Properties();
-					props.put("mail.smtp.host", smtp);
-					props.put("mail.smtp.auth", "true");
-					props.put("mail.smtp.port", "465");
-					props.put("mail.smtp.starttls.enable", "true");
-					Authenticator auth = new SMTP_Authenticator	(user, pass);
-					Session session = Session.getInstance(props, auth);
-					//Session session = Session.getDefaultInstance(props);
-					//props.put("mail.smtp.user",user);
-					//props.put("mail.smtp.password",pass);
-												    
-				    //Composing the message
-				    MimeMessage message = new MimeMessage(session);
-				      message.setFrom(from);
-				    message.setRecipients(Message.RecipientType.TO,to);
-				    message.setRecipients(Message.RecipientType.CC,cc);
-				    message.setRecipients(Message.RecipientType.BCC,bcc);
-				    message.setSubject(subject);
-				    if(file==null)
-				    {
-				    	
-					    //message.setText(body);
-				    	message.setContent(body, "text/html");
-				    }
-				    else
-				    {
-					    Multipart multipart = new MimeMultipart();
-					    BodyPart messageBodyPart;
-					    messageBodyPart = new MimeBodyPart();
-					    messageBodyPart.setContent(body, "text/html");
-					    //messageBodyPart.setText(body);
-					    multipart.addBodyPart(messageBodyPart);
-					    messageBodyPart = new MimeBodyPart();
-					    DataSource source = new FileDataSource(file);
-					    messageBodyPart.setDataHandler(new DataHandler(source));
-					    messageBodyPart.setFileName(file);
-					    multipart.addBodyPart(messageBodyPart);
-		
-					    message.setContent(multipart);
-				    }
-		
-					Transport tr = session.getTransport("smtp");			
-					tr.connect(smtp, mailboxAddress, pass);
-					message.saveChanges();
-					tr.sendMessage(message, message.getAllRecipients());
-					tr.close();
-				}
-		    }
-			catch(Exception e)
-			{
-				e.printStackTrace();
-				correct=false;
-			}
-			return correct;
-		}
-
-		private  boolean validateAddress(InternetAddress from,
-				InternetAddress[] to, InternetAddress[] cc,
-				InternetAddress[] bcc) {
-			boolean correct = true;
-			try{
-				correct = from!=null && !from.getAddress().equals("") && to!=null && to.length>=1;
-				String regEx="[^\\s]+@[^\\s]+.[^\\s]+";
-				Pattern pc = Pattern.compile(regEx);
-				Matcher m = null ;
-
-				if(correct){
-					m = pc.matcher(from.getAddress());
-					correct = m.matches();
-				}
-				
-				if(correct){
-					int vault = to.length;
-					while(correct && vault<to.length){
-						correct = !to[vault].getAddress().equals("");
-						if(correct){
-					    	m = pc.matcher(to[vault].getAddress());
-					    	correct = m.matches();
-						}
-						vault++;
-					}
-				}
-				
-				if(correct && cc!=null){
-					int vault = cc.length;
-					while(correct && vault<cc.length){
-						correct = !cc[vault].getAddress().equals("");
-						if(correct){
-					    	m = pc.matcher(cc[vault].getAddress());
-					    	correct = m.matches();
-						}
-						vault++;
-					}
-				}
-				
-				if(correct && bcc!=null){
-					int vault = bcc.length;
-					while(correct && vault<bcc.length){
-						correct = !bcc[vault].getAddress().equals("");
-						if(correct){
-					    	m = pc.matcher(bcc[vault].getAddress());
-					    	correct = m.matches();
-						}
-						vault++;
-					}
-				}
-				
-			}catch(Exception e){
-				e.printStackTrace();
-				correct=false;
-			}
-			return correct;
-		}
-
-		private  InternetAddress[] eliminateSpaces(InternetAddress[] address) {
-			if(address!=null){
-				for(int i=0;i<address.length;i++){
-					address[i].setAddress(address[i].getAddress().replace(" ","").trim());
-				}
-			}
-			return address;
-		}		
-
-		
-		public static void main(String[] args){
-			EmailSender s = new EmailSender();
-			try {
-				s.sendMail("smtp.rambler.ru", "boris_galitsky@rambler.ru", "b06g93", 
-						new InternetAddress("bgalitsky@hotmail.com"), new InternetAddress[]{new InternetAddress("bgalitsky@hotmail.com")}, new InternetAddress[]{}, new InternetAddress[]{}, 
-						"Generated content for you", "body", null);
-			} catch (AddressException e) {
-				e.printStackTrace();
-			} catch (Exception e) {
-				e.printStackTrace();
-			}
-		}
-}
+package opennlp.tools.apps.utils.email;
+
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.mail.*;
+import javax.mail.internet.*;
+import javax.activation.*;
+
+/**
+ * Responsible to sending e-mails trough a gmail smtp server.
+ * It will be extended to handle arbitrary smtp servers.
+ * @author GaDo
+ *
+ */
+public class EmailSender {
+		private static final long serialVersionUID = 1L;
+		private static final String mailboxAddress="boris_galitsky@rambler.ru";
+
+		public  boolean sendMail(String smtp, String user, String pass, InternetAddress from, InternetAddress[] to, InternetAddress[] cc, InternetAddress[] bcc, String subject, String body, String file) throws Exception
+		{
+			boolean correct=true;
+			try
+			{							
+				//Eliminate spaces from addresses
+				if(from!=null){		
+					from.setAddress(from.getAddress().replace(" ","").trim());		}
+					to = eliminateSpaces(to);
+					cc = eliminateSpaces(cc);
+					bcc = eliminateSpaces(bcc);
+					correct = validateAddress(from,to,cc,bcc);
+				
+				if(correct){
+					//Configuracio of the properties -> smtp
+					Properties props = new Properties();
+					props.put("mail.smtp.host", smtp);
+					props.put("mail.smtp.auth", "true");
+					props.put("mail.smtp.port", "465");
+					props.put("mail.smtp.starttls.enable", "true");
+					Authenticator auth = new SMTP_Authenticator	(user, pass);
+					Session session = Session.getInstance(props, auth);
+					//Session session = Session.getDefaultInstance(props);
+					//props.put("mail.smtp.user",user);
+					//props.put("mail.smtp.password",pass);
+												    
+				    //Composing the message
+				    MimeMessage message = new MimeMessage(session);
+				      message.setFrom(from);
+				    message.setRecipients(Message.RecipientType.TO,to);
+				    message.setRecipients(Message.RecipientType.CC,cc);
+				    message.setRecipients(Message.RecipientType.BCC,bcc);
+				    message.setSubject(subject);
+				    if(file==null)
+				    {
+				    	
+					    //message.setText(body);
+				    	message.setContent(body, "text/html");
+				    }
+				    else
+				    {
+					    Multipart multipart = new MimeMultipart();
+					    BodyPart messageBodyPart;
+					    messageBodyPart = new MimeBodyPart();
+					    messageBodyPart.setContent(body, "text/html");
+					    //messageBodyPart.setText(body);
+					    multipart.addBodyPart(messageBodyPart);
+					    messageBodyPart = new MimeBodyPart();
+					    DataSource source = new FileDataSource(file);
+					    messageBodyPart.setDataHandler(new DataHandler(source));
+					    messageBodyPart.setFileName(file);
+					    multipart.addBodyPart(messageBodyPart);
+		
+					    message.setContent(multipart);
+				    }
+		
+					Transport tr = session.getTransport("smtp");			
+					tr.connect(smtp, mailboxAddress, pass);
+					message.saveChanges();
+					tr.sendMessage(message, message.getAllRecipients());
+					tr.close();
+				}
+		    }
+			catch(Exception e)
+			{
+				e.printStackTrace();
+				correct=false;
+			}
+			return correct;
+		}
+
+		private  boolean validateAddress(InternetAddress from,
+				InternetAddress[] to, InternetAddress[] cc,
+				InternetAddress[] bcc) {
+			boolean correct = true;
+			try{
+				correct = from!=null && !from.getAddress().equals("") && to!=null && to.length>=1;
+				String regEx="[^\\s]+@[^\\s]+.[^\\s]+";
+				Pattern pc = Pattern.compile(regEx);
+				Matcher m = null ;
+
+				if(correct){
+					m = pc.matcher(from.getAddress());
+					correct = m.matches();
+				}
+				
+				if(correct){
+					int vault = to.length;
+					while(correct && vault<to.length){
+						correct = !to[vault].getAddress().equals("");
+						if(correct){
+					    	m = pc.matcher(to[vault].getAddress());
+					    	correct = m.matches();
+						}
+						vault++;
+					}
+				}
+				
+				if(correct && cc!=null){
+					int vault = cc.length;
+					while(correct && vault<cc.length){
+						correct = !cc[vault].getAddress().equals("");
+						if(correct){
+					    	m = pc.matcher(cc[vault].getAddress());
+					    	correct = m.matches();
+						}
+						vault++;
+					}
+				}
+				
+				if(correct && bcc!=null){
+					int vault = bcc.length;
+					while(correct && vault<bcc.length){
+						correct = !bcc[vault].getAddress().equals("");
+						if(correct){
+					    	m = pc.matcher(bcc[vault].getAddress());
+					    	correct = m.matches();
+						}
+						vault++;
+					}
+				}
+				
+			}catch(Exception e){
+				e.printStackTrace();
+				correct=false;
+			}
+			return correct;
+		}
+
+		private  InternetAddress[] eliminateSpaces(InternetAddress[] address) {
+			if(address!=null){
+				for(int i=0;i<address.length;i++){
+					address[i].setAddress(address[i].getAddress().replace(" ","").trim());
+				}
+			}
+			return address;
+		}		
+
+		
+		public static void main(String[] args){
+			EmailSender s = new EmailSender();
+			try {
+				s.sendMail("smtp.rambler.ru", "boris_galitsky@rambler.ru", "b06g93", 
+						new InternetAddress("bgalitsky@hotmail.com"), new InternetAddress[]{new InternetAddress("bgalitsky@hotmail.com")}, new InternetAddress[]{}, new InternetAddress[]{}, 
+						"Generated content for you", "body", null);
+			} catch (AddressException e) {
+				e.printStackTrace();
+			} catch (Exception e) {
+				e.printStackTrace();
+			}
+		}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java
index a57601b..cf0a433 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java
@@ -1,24 +1,24 @@
-package opennlp.tools.apps.utils.email;
-import javax.mail.*;
-
-
-/**
- * This contains the required informations for the smtp authorization!
- *
- */
-
-public class SMTP_Authenticator extends javax.mail.Authenticator {
-	
-	private String username="bg7550@gmail.com";
-	private String password="pill0693";	
-	
-	public SMTP_Authenticator(String user, String pwd) {
-		username=user;
-		password=pwd;
-	}
-
-		
-	public PasswordAuthentication getPasswordAuthentication() {
-		return new PasswordAuthentication(username, password);
-		}
-}
+package opennlp.tools.apps.utils.email;
+import javax.mail.*;
+
+
+/**
+ * This contains the required informations for the smtp authorization!
+ *
+ */
+
+public class SMTP_Authenticator extends javax.mail.Authenticator {
+	
+	private String username="bg7550@gmail.com";
+	private String password="pill0693";	
+	
+	public SMTP_Authenticator(String user, String pwd) {
+		username=user;
+		password=pwd;
+	}
+
+		
+	public PasswordAuthentication getPasswordAuthentication() {
+		return new PasswordAuthentication(username, password);
+		}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java
index 52cd3c7..d6de493 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java
@@ -1,258 +1,258 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.doc_classifier;
-
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
-import org.apache.tika.Tika;
-
-public class ClassifierTrainingSetIndexer {
-  
-  public static String resourceDir = new File(".").getAbsolutePath().replace("/.", "") + "/src/main/resources";
-  public static String INDEX_PATH = "/classif", CLASSIF_TRAINING_CORPUS_PATH = "/training_corpus";
-  protected ArrayList<File> queue = new ArrayList<>();
-  Tika tika = new Tika();
-
-  IndexWriter indexWriter = null;
-  protected static String[] domains =  new String[] { "legal", "health", "computing", "engineering", "business" };
-  private String absolutePathTrainingSet=null;
-
-  public ClassifierTrainingSetIndexer() {
-
-    try {
-      initIndexWriter(resourceDir);
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
-  }
-
-  public ClassifierTrainingSetIndexer(String absolutePathTrainingSet) {
-    this.absolutePathTrainingSet = absolutePathTrainingSet;
-    try {
-      initIndexWriter(resourceDir);
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
-  }
-
-  public void indexTrainingSet() {
-
-    try {
-      if (absolutePathTrainingSet==null)
-        indexFileOrDirectory(resourceDir
-                + CLASSIF_TRAINING_CORPUS_PATH);
-      else
-        indexFileOrDirectory(
-                this.absolutePathTrainingSet);
-
-    } catch (IOException e1) {
-      e1.printStackTrace();
-    }
-    try {
-      indexWriter.commit();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-  /*
-  private void indexTrainingSample(String text, String flag, int id)
-          throws IOException {
-
-      Document doc = new Document();
-      doc.add(new StringField("id", new Integer(id).toString(),
-              Field.Store.YES));
-      doc.add(new TextField("text", text.toLowerCase(), Field.Store.YES));
-      doc.add(new StringField("class", flag.toLowerCase(), Field.Store.YES));
-      indexWriter.addDocument(doc);
-
-  }
-  */
-
-  private void addFiles(File file) {
-
-    if (!file.exists()) {
-      System.out.println(file + " does not exist.");
-    }
-    if (file.isDirectory()) {
-      for (File f : file.listFiles()) {
-        if (f.getName().startsWith("."))
-          continue;
-        addFiles(f);
-        System.out.println(f.getName());
-      }
-    } else {
-      queue.add(file);
-
-    }
-  }
-
-  // index last folder name, before filename itself
-
-  public void indexFileOrDirectory(String fileName) throws IOException {
-    addFiles(new File(fileName));
-
-    List<File> files = new ArrayList<File>(queue);
-    for (File f : files) {
-      if (!f.getName().endsWith(".xml")) {
-
-        try {
-          Document doc = new Document();
-
-          String name = f.getPath();
-          String className = null;
-          for (String d : domains) {
-            if (name.indexOf(d) > -1) {
-              className = d;
-              break;
-            }
-          }
-
-          try {
-            doc.add(new TextField("text", tika.parse(f)));
-          } catch (Exception e1) {
-            e1.printStackTrace();
-          }
-
-          doc.add(new StringField("path", f.getPath(),
-                  Field.Store.YES));
-          doc.add(new StringField("class", className, Field.Store.YES));
-          try {
-
-            indexWriter.addDocument(doc);
-
-          } catch (Exception e) {
... 283086 lines suppressed ...