You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by bg...@apache.org on 2014/01/06 18:48:32 UTC

svn commit: r1555944 [2/11] - in /opennlp/sandbox/opennlp-similarity/src: main/java/opennlp/tools/apps/ main/java/opennlp/tools/apps/contentgen/ main/java/opennlp/tools/apps/contentgen/multithreaded/ main/java/opennlp/tools/apps/relevanceVocabs/ main/j...

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.review_builder;
+
+import java.util.Arrays;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.utils.Utils;
+
+import org.apache.commons.lang.StringUtils;
+
+public class MinedSentenceProcessor {
+  public static String acceptableMinedSentence(String sent) {
+    // if too many commas => seo text
+
+    String[] commas = StringUtils.split(sent, ',');
+    String[] spaces = StringUtils.split(sent, ' ');
+    if ((float) commas.length / (float) spaces.length > 0.7) {
+      System.out.println("Rejection: too many commas");
+      return null;
+    }
+    
+    String[] otherDelimiters = StringUtils.split(sent, '/');
+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
+        System.out.println("Rejection: too many delimiters");
+        return null;
+    }
+    
+    otherDelimiters = StringUtils.split(sent, '.');
+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
+        System.out.println("Rejection: too many delimiters");
+        return null;
+    }
+    otherDelimiters = StringUtils.split(sent, '!');
+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
+        System.out.println("Rejection: too many delimiters");
+        return null;
+    }
+    otherDelimiters = StringUtils.split(sent, '=');
+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {
+        System.out.println("Rejection: too many delimiters");
+        return null;
+    }
+    
+    String[] pipes = StringUtils.split(sent, '|');
+    if (StringUtils.split(sent, '|').length > 2
+        || StringUtils.split(sent, '>').length > 2) {
+      System.out.println("Rejection: too many |s or >s ");
+      return null;
+    }
+    String sentTry = sent.toLowerCase();
+    // if too many long spaces
+    String sentSpaces = sentTry.replace("   ", "");
+    if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -
+      // suspicious
+      return null;
+
+    if (sentTry.indexOf("click here") > -1 || sentTry.indexOf(" wikip") > -1
+        || sentTry.indexOf("copyright") > -1
+        || sentTry.indexOf("operating hours") > -1
+        || sentTry.indexOf("days per week") > -1
+        || sentTry.indexOf("click for") > -1 || sentTry.indexOf("photos") > -1
+        || sentTry.indexOf("find the latest") > -1
+        || sentTry.startsWith("subscribe")
+        || sentTry.indexOf("Terms of Service") > -1
+        || sentTry.indexOf("clicking here") > -1
+        || sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1
+        || sentTry.indexOf("Tags:") > -1 || sentTry.startsWith("Posted by")
+        || sentTry.indexOf("available online") > -1
+        || sentTry.indexOf("get online") > -1
+        || sentTry.indexOf("buy online") > -1
+        || sentTry.indexOf("not valid") > -1 || sentTry.indexOf("discount") > -1
+        || sentTry.indexOf("official site") > -1
+        || sentTry.indexOf("this video") > -1
+        || sentTry.indexOf("this book") > -1
+        || sentTry.indexOf("this product") > -1
+        || sentTry.indexOf("paperback") > -1 || sentTry.indexOf("hardcover") > -1
+        || sentTry.indexOf("audio cd") > -1
+        || sentTry.indexOf("related searches") > -1
+        || sentTry.indexOf("permission is granted") > -1
+        || sentTry.indexOf("[edit") > -1
+        || sentTry.indexOf("edit categories") > -1
+        || sentTry.indexOf("free license") > -1
+        || sentTry.indexOf("permission is granted") > -1
+        || sentTry.indexOf("under the terms") > -1
+        || sentTry.indexOf("rights reserved") > -1
+        || sentTry.indexOf("wikipedia") > -1 || sentTry.endsWith("the")
+        || sentTry.endsWith("the.") || sentTry.startsWith("below") 
+        || sentTry.indexOf("recipient of")>-1 || sentTry.indexOf("this message")>-1 
+        ||sentTry.indexOf( "mailing list")>-1 ||sentTry.indexOf( "purchase order")>-1
+        ||sentTry.indexOf( "mon-fri")>-1 ||sentTry.indexOf( "email us")>-1 ||sentTry.indexOf( "privacy pol")>-1 ||sentTry.indexOf( "back to top")>-1 
+        ||sentTry.indexOf( "click here")>-1 ||sentTry.indexOf( "for details")>-1 ||sentTry.indexOf( "assistance?")>-1 ||sentTry.indexOf( "chat live")>-1
+        ||sentTry.indexOf( "free shipping")>-1 ||sentTry.indexOf( "company info")>-1 ||sentTry.indexOf( "satisfaction g")>-1 ||sentTry.indexOf( "contact us")>-1
+        ||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")||sentTry.indexOf( "conditions")>-1 ||sentTry.indexOf( "chat live")>-1
+        ||sentTry.startsWith( "we ") ||sentTry.indexOf( "the recipient")>-1 ||sentTry.indexOf( "day return")>-1 ||sentTry.indexOf( "days return")>-1
+        
+        ||sentTry.startsWith( "fax") ||sentTry.indexOf( "refund it")>-1 || sentTry.indexOf( "your money")>-1
+        ||sentTry.startsWith( "free") ||sentTry.indexOf( "purchase orders")>-1
+        ||sentTry.startsWith( "exchange it ") ||sentTry.indexOf( "return it")>-1 ||sentTry.indexOf( "credit card")>-1 
+        
+        ||sentTry.indexOf( "storeshop")>-1 || sentTry.startsWith( "find") || sentTry.startsWith( "shop") || sentTry.startsWith( "unlimited") 
+        ||sentTry.indexOf( "for a limited time")>-1 ||sentTry.indexOf( "prime members")>-1 ||sentTry.indexOf( "amazon members")>-1 ||sentTry.indexOf( "unlimited free")>-1 
+        ||sentTry.indexOf( "shipping")>-1 || sentTry.startsWith( "amazon")
+// not a script text
+        ||sentTry.indexOf( "document.body")>-1 ||sentTry.indexOf( " var ")>-1         ||sentTry.indexOf( "search suggestions")>-1 ||sentTry.startsWith( "Search") 
+        
+    		)
+      return null;
+    
+    //Millions of Amazon Prime members enjoy instant videos, free Kindle books and unlimited free two-day shipping.
+
+    // count symbols indicating wrong parts of page to mine for text
+    // if short and contains too many symbols indicating wrong area: reject
+    String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&")
+        .replace("|", "&&&").replace(":", "&&&").replace("/", "&&&")
+        .replace("-", "&&&").replace("%", "&&&");
+    if ((sentWrongSym.length() - sentTry.length()) >= 4
+        && sentTry.length() < 200) // twice ot more
+      return null;
+
+    sent = sent.replace('[', ' ').replace(']', ' ')
+        .replace("_should_find_orig_", "").replace(".   .", ". ")
+        .replace("amp;", " ").replace("1.", " ").replace("2.", " ")
+        .replace("3.", " ").replace("4.", " ").replace("2009", "2011")
+        .replace("2008", "2011").replace("2006", "2011")
+        .replace("2007", "2011").replace("VIDEO:", " ").replace("Video:", " ")
+        .replace("no comments", " ").replace("  ", " ").replace("  ", " ")
+        .replace("(more.)", "").replace("more.", "").replace("<more>", "")
+        .replace("[more]", "").replace(".,", ".").replace("&lt;", "")
+        .replace("p&gt;", "").replace("product description", "");
+
+    // TODO .replace("a.", ".");
+
+    int endIndex = sent.indexOf(" posted");
+    if (endIndex > 0)
+      sent = sent.substring(0, endIndex);
+
+    return sent;
+  }
+
+  public static String processSentence(String pageSentence) {
+    if (pageSentence == null)
+      return "";
+    pageSentence = Utils.fullStripHTML(pageSentence);
+    pageSentence = StringUtils.chomp(pageSentence, "..");
+    pageSentence = StringUtils.chomp(pageSentence, ". .");
+    pageSentence = StringUtils.chomp(pageSentence, " .");
+    pageSentence = StringUtils.chomp(pageSentence, ".");
+    pageSentence = StringUtils.chomp(pageSentence, "...");
+    pageSentence = StringUtils.chomp(pageSentence, " ....");
+    pageSentence = pageSentence.replace("::", ":").replace(".,", ". ")
+        .replace("(.)", "");
+
+    pageSentence = pageSentence.trim();
+    pageSentence = pageSentence.replaceAll("\\s+", " "); // make single
+    // spaces
+    // everywhere
+
+    String[] pipes = StringUtils.split(pageSentence, '|'); // removed
+    // shorter part
+    // of sentence
+    // at the end
+    // after pipe
+    if (pipes.length == 2
+        && ((float) pipes[0].length() / (float) pipes[1].length() > 3.0)) {
+      int pipePos = pageSentence.indexOf("|");
+      if (pipePos > -1)
+        pageSentence = pageSentence.substring(0, pipePos - 1).trim();
+
+    }
+
+    if (!StringUtils.contains(pageSentence, '.')
+        && !StringUtils.contains(pageSentence, '?')
+        && !StringUtils.contains(pageSentence, '!'))
+      pageSentence = pageSentence + ". ";
+
+    pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();
+    if (!pageSentence.endsWith("."))
+      pageSentence += ". ";
+    return pageSentence;
+  }
+
+  
+  public static String normalizeForSentenceSplitting(String pageContent) {
+    pageContent.replace("Jan.", "January").replace("Feb.", "February")
+        .replace("Mar.", "March").replace("Apr.", "April")
+        .replace("Jun.", "June").replace("Jul.", "July")
+        .replace("Aug.", "August").replace("Sep.", "September")
+        .replace("Oct.", "October").replace("Nov.", "November")
+        .replace("Dec.", "December");
+
+    return pageContent;
+
+  }
+}
\ No newline at end of file

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ParserConstants.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ParserConstants.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ParserConstants.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ParserConstants.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,74 @@
+package opennlp.tools.apps.review_builder;
+
+public interface ParserConstants {
+	// added new POS types for infinitive phrase and participle phrase
+	public static final String TYPE_STP = "STP"; // infinitive phrase
+	public static final String TYPE_SGP = "SGP"; // present participle phrase
+	public static final String TYPE_SNP = "SNP"; // past participle phrase
+
+	// below are the standard POS types,
+	// http://bulba.sdsu.edu/jeanette/thesis/PennTags.html
+	public static final String TYPE_ADJP = "ADJP";
+	public static final String TYPE_ADVP = "ADVP";
+	public static final String TYPE_CC = "CC";
+	public static final String TYPE_CD = "CD";
+	public static final String TYPE_CONJP = "CONJP";
+	public static final String TYPE_DT = "DT";
+	public static final String TYPE_EX = "EX";
+	public static final String TYPE_FRAG = "FRAG";
+	public static final String TYPE_FW = "FW";
+	public static final String TYPE_IN = "IN";
+	public static final String TYPE_INTJ = "INTJ";
+	public static final String TYPE_JJ = "JJ";
+	public static final String TYPE_JJR = "JJR";
+	public static final String TYPE_JJS = "JJS";
+	public static final String TYPE_LS = "LS";
+	public static final String TYPE_LST = "LST";
+	public static final String TYPE_MD = "MD";
+	public static final String TYPE_NAC = "NAC";
+	public static final String TYPE_NN = "NN";
+	public static final String TYPE_NNS = "NNS";
+	public static final String TYPE_NNP = "NNP";
+	public static final String TYPE_NNPS = "NNPS";
+	public static final String TYPE_NP = "NP";
+	public static final String TYPE_NX = "NX";
+	public static final String TYPE_PDT = "PDT";
+	public static final String TYPE_POS = "POS";
+	public static final String TYPE_PP = "PP";
+	public static final String TYPE_PRN = "PRN";
+	public static final String TYPE_PRP = "PRP";
+	public static final String TYPE_PRP$ = "PRP$";
+	public static final String TYPE_PRT = "PRT";
+	public static final String TYPE_QP = "QP";
+	public static final String TYPE_RB = "RB";
+	public static final String TYPE_RBR = "RBR";
+	public static final String TYPE_RBS = "RBS";
+	public static final String TYPE_RP = "RP";
+	public static final String TYPE_RRC = "RRC";
+	public static final String TYPE_S = "S";
+	public static final String TYPE_SBAR = "SBAR";
+	public static final String TYPE_SBARQ = "SBARQ";
+	public static final String TYPE_SINV = "SINV";
+	public static final String TYPE_SQ = "SQ";
+	public static final String TYPE_SYM = "SYM";
+	public static final String TYPE_TO = "TO";
+	public static final String TYPE_TOP = "TOP";
+	public static final String TYPE_UCP = "UCP";
+	public static final String TYPE_UH = "UH";
+	public static final String TYPE_VB = "VB";
+	public static final String TYPE_VBD = "VBD";
+	public static final String TYPE_VBG = "VBG";
+	public static final String TYPE_VBN = "VBN";
+	public static final String TYPE_VBP = "VBP";
+	public static final String TYPE_VBZ = "VBZ";
+	public static final String TYPE_VP = "VP";
+	public static final String TYPE_WDT = "WDT";
+	public static final String TYPE_WHADJP = "WHADJP";
+	public static final String TYPE_WHADVP = "WHADVP";
+	public static final String TYPE_WHNP = "WHNP";
+	public static final String TYPE_WHPP = "WHPP";
+	public static final String TYPE_WP = "WP";
+	public static final String TYPE_WP$ = "WP$";
+	public static final String TYPE_WRB = "WRB";
+	public static final String TYPE_X = "X";
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,166 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.jsmlearning.ProfileReaderWriter;
+import opennlp.tools.parse_thicket.Triple;
+
+public class ReviewBuilderRunner {
+
+	private List<Triple> input = new ArrayList<Triple>(); 
+
+	public ReviewBuilderRunner(){
+
+		/*	input.add( new Pair<String, Integer>("chief architect portable mobile tv", 204973051));
+
+	input.add( new Pair<String, Integer>("lg plasma tv", 215734562));
+	input.add( new Pair<String, Integer>("magnavox lcd hdtv", 215415652));
+	input.add( new Pair<String, Integer>("yamaha aventage home theater receiver", 215742271));
+	input.add( new Pair<String, Integer>("panasonic 24inch lcd tv", 215742233));
+	input.add( new Pair<String, Integer>("otterbox barnes and noble nook commuter case", 215572161));
+	input.add( new Pair<String, Integer>("sony kdl32ex340 led tv", 215743925));
+	input.add( new Pair<String, Integer>("alpine waterfall tabletop fountain lighting", 215135546));
+    input.add( new Pair<String, Integer>("ihome rechargeable speaker system", 215363231 ));
+	input.add( new Pair<String, Integer>("ion slide film scanner", 212088884));
+
+		 input.add( new Pair<String, Integer>("mens dr martens shoes black nappa", 210813142));
+		 input.add( new Pair<String, Integer>("calvin klein seamless thong panty", 201984853));
+		 input.add( new Pair<String, Integer>("mens clarks shoes wallabee beeswax leather", 210808477));
+		//? input.add( new Pair<String, Integer>("mens sperry topsider shoes", 210809238));
+		 input.add( new Pair<String, Integer>("mens giorgio brutini shoes italian calf", 210809508));
+
+		input.add( new Pair<String, Integer>("halo portable backup battery", 1640825398));
+input.add( new Pair<String, Integer>("kenwood pkgmp18 cd receiver  coaxial speakers",1642712915));
+input.add( new Pair<String, Integer>("element ultraslim hdtv",1643167865));
+input.add( new Pair<String, Integer>("westinghouse  dled hdtv black",1641930013));
+input.add( new Pair<String, Integer>("boss audio receiver speaker package system",1643532459));
+input.add( new Pair<String, Integer>("kenwood  cd receiver coaxial speakers bundle",1646566070));
+input.add( new Pair<String, Integer>("element electronics lcd tv black ",1637163018));
+input.add( new Pair<String, Integer>("stunt copter rechargeable battery pack",1636937811));
+input.add( new Pair<String, Integer>("element led ultraslim hdtv  soundbar",1637572596));
+input.add( new Pair<String, Integer>("boss  receiver speaker package system bundle",1646566067));
+input.add( new Pair<String, Integer>("coby  hd tv",1638746307));
+input.add( new Pair<String, Integer>("vizio  diag led smart hdtv",1660162001));
+input.add( new Pair<String, Integer>("sony dock for ipad ipod and iphone",1646826284));
+input.add( new Pair<String, Integer>("vizio  led  ultraslim hdtv",1642018249));
+input.add( new Pair<String, Integer>("lcd kula tv multimedia player",1640265845));
+
+input.add(new Pair<String, Integer>("liz and co alex tall leather boots",1630836375));
+input.add( new Pair<String, Integer>("total girl silvia sequin moccasin", 1630828314));
+input.add( new Pair<String, Integer>("new england patriots new era nfl sport sideline knit", 1588531904));
+input.add( new Pair<String, Integer>("betseyville sequin backpack", 1630825375));
+input.add( new Pair<String, Integer>("the north face womens osito jacket mojito", 1639791775));
+input.add( new Pair<String, Integer>("misty harbor raincoat trench removable liner", 903542613));
+input.add(new Pair<String, Integer>("ae womens camo jacket ", 1229070780));
+input.add(new Pair<String, Integer>("indianapolis colts sideline knit", 1588531896));
+input.add(new Pair<String, Integer>("b o c korah boot", 1622401738));
+input.add(new Pair<String, Integer>("adidas mens speed cut track suit", 920744865));
+input.add(new Pair<String, Integer>("liz and co lulu zipper boots", 1630836380));
+input.add(new Pair<String, Integer>("black navy  lightweight oxford shoes", 906123996));
+input.add(new Pair<String, Integer>("liz and co farley tall boots", 1639960280));
+input.add(new Pair<String, Integer>("call it spring karpin  pullon boots", 1629938981));
+input.add(new Pair<String, Integer>("ugg australia bailey bow boots", 1594029054));
+input.add(new Pair<String, Integer>("dream chasers  jacket", 1631247949));
+input.add(new Pair<String, Integer>("guess military  tiewaist coat", 1629993909));
+input.add(new Pair<String, Integer>("madden girl allstaar womens zip boots", 1581506993));
+input.add(new Pair<String, Integer>("michael womens shoes", 1590598743));
+input.add(new Pair<String, Integer>("sonoma life style suede midcalf boots women", 1617302927));
+
+		input.add(new Pair<String, Integer>("absolute pnf300 power noise filterground loop isolator with adjustable controls", 1521965454));
+		input.add(new Pair<String, Integer>("sennheiser ie8 stereo earbuds", 211969101));
+		input.add(new Pair<String, Integer>("sanus vlmf109 motorized full motion mount for tvs 37 60 up to 110 lbs", 214893385));
+		input.add(new Pair<String, Integer>("s2fmcy003 earset stereo earbud binaural open miniphone black", 214972916));
+		input.add(new Pair<String, Integer>("boconi bags and leather bryant safari bag carry on luggage brown", 1646568995));
+		input.add(new Pair<String, Integer>("diesel derik pant jyt mens pajama gray", 1645725530));
+		input.add(new Pair<String, Integer>("sole society gina sandal", 1633021283));
+		input.add(new Pair<String, Integer>("toms bimini stitchout slipon women", 1633012540));
+		input.add(new Pair<String, Integer>("the north face womens p r tka 100 microvelour glacier 14 zip tnf blackjk3 medium", 1618022193));
+		input.add(new Pair<String, Integer>("robert graham manuel dress shirt mens long sleeve button up blue", 1631119485));
+
+		input.add(new Pair<String, Integer>("b o c leesa", 1584193288));
+			input.add(new Pair<String, Integer>("blair stirrup pants", 1525621516));
+			input.add(new Pair<String, Integer>("donna karan shirtdress", 1463793963));
+			input.add(new Pair<String, Integer>("columbia sportswear terminal tackle shirt", 1661238030));
+			input.add(new Pair<String, Integer>("carters jersey pajamas", 1573999243));
+			input.add(new Pair<String, Integer>("vince camuto dena", 1626272001));
+			input.add(new Pair<String, Integer>("pistil hudson knit hats", 1660874149));
+			input.add(new Pair<String, Integer>("naturalizer trinity wide shaft womens zip", 1569191459));
+			input.add(new Pair<String, Integer>("bare traps chelby womens sandals", 1513387756));
+			input.add(new Pair<String, Integer>("overland storage hard drive 1 tb hotswap", 212107374));
+			input.add(new Pair<String, Integer>("humminbird indash depth finder", 1616650484));
+			input.add(new Pair<String, Integer>("grepsr800 gre dig scanner", 215723895));
+			input.add(new Pair<String, Integer>("humminbird kayak transducer", 215392426));
+			input.add(new Pair<String, Integer>("garmin nuvi suction cup mount ", 215728710));
+			input.add(new Pair<String, Integer>("crosley radio black", 215662289));
+
+		    input.add(new Triple<String, Integer, String >("avaya ip telephone", 1440488008, "lucent phone system"));
+			input.add(new Triple<String, Integer, String>("clarks trolley womens shoes", 1581854074, "clark womens shoes"));
+			input.add(new Triple<String, Integer, String>("mens evans shoes imperial deer", 210808400, "lb evans slippers"));
+			input.add(new Triple<String, Integer, String>("ugg classic bow shorty gloves", 1665094898, "leather gloves women"));
+			input.add(new Triple<String, Integer, String>("jumping beans man tee baby", 1667155332, "jumping beans clothing"));
+			input.add(new Triple<String, Integer, String>("asics mens shoes", 1630208773, "asics mens running shoes"));
+			input.add(new Triple<String, Integer, String>("oakley hoodie mens fleece", 1656661466, "hoodies for men"));
+			input.add(new Triple<String, Integer, String>("usb sound control digital voice recorder", 1654662662, "digital voice recorder with usb"));
+			input.add(new Triple<String, Integer, String>("motorola bluetooth headset", 215376254, "motorola oasis bluetooth headset"));
+			input.add(new Triple<String, Integer, String>("sony sound bar home theater system", 215450833, "sony sound bar"));
+			input.add(new Triple<String, Integer, String>("jvc full hd everio camcorder", 1664479999, "jvc everio camcorder"));
+		 */
+		
+		 input.add(new Triple<String, Integer, String>("dr martens beckett laceup boots", 1651452641, "doc martin shoes"));
+		 input.add(new Triple<String, Integer, String>("pioneer cd changer",204654672, "pioneer cd player"));
+		 input.add(new Triple<String, Integer, String>("tablet handler strap and desk mount", 1634326303, "tablet holder"));
+		 input.add(new Triple<String, Integer, String>("sockwell loden womens overthecalf socks", 1644572708, "compression stockings, support stockings"));
+		 input.add(new Triple<String, Integer, String>("nike eclipse womens shoes", 1657807048, "nike eclipse ii women s shoe"));
+		 input.add(new Triple<String, Integer, String>("cherokee workwear womens scrub pant black stall",211643295, "cherokee workwear scrubs"));
+		 input.add(new Triple<String, Integer, String>("columbia sportswear jacket ", 1667381935, "columbia omni heat"));
+		 input.add(new Triple<String, Integer, String>("adidas adipure jacket", 1040124787, "adidas track jacket"));
+		 input.add(new Triple<String, Integer, String>("clarks may orchid womens shoes", 1585805688, "clarks loafers"));
+		 input.add(new Triple<String, Integer, String>("levis pants empire blue", 1670283141, "skinny jeans for guys"));
+		 input.add(new Triple<String, Integer, String>("nike jordan black cat tee", 1653598764, "jordan black cat"));
+		 input.add(new Triple<String, Integer, String>("obermeyer womens kassandra down coat", 1670629180, "down winter coats"));
+/*
+		 input.add(new Triple<String, Integer, String>("paramax  surround sound", 835422569, "paramax im3"));
+		 input.add(new Triple<String, Integer, String>("mia quincy wedge", 1285886230, "mia quincy wedge"));
+		 input.add(new Triple<String, Integer, String>("able planet headphones", 1648522886, "able planet nc210g"));
+		 input.add(new Triple<String, Integer, String>("samsung replacement lamp", 695793593, "lamp code bp96"));
+		 input.add(new Triple<String, Integer, String>("paul green emerson boot castagno", 1313967918, "paul green emerson boot"));
+		 input.add(new Triple<String, Integer, String>("bandolino caresse boots", 1448643623, "bandolino caresse boots"));
+		 input.add(new Triple<String, Integer, String>("nine west modiley", 1365998968, "nine west modiley"));
+		 input.add(new Triple<String, Integer, String>("converse chuck taylor  bisay", 1555900934, "turquoise chuck taylors"));
+		 input.add(new Triple<String, Integer, String>("gentle souls bay leaf flats", 1436175162, "gentle souls bay leaf"));
+		 input.add(new Triple<String, Integer, String>("sauce hockey  back hat", 1644440355, "sauce hockey discount code"));
+		 input.add(new Triple<String, Integer, String>("aravon farren oxford shoes", 1644573438, "aravon wef07sh"));
+	*/	 input.add(new Triple<String, Integer, String>("kooba crosby hobo handbags", 1326503038, "kooba crosby"));
+		 input.add(new Triple<String, Integer, String>("bcbgmaxazria sheath dress", 1313949777, "bcbgmaxazria illusion bodice ruched sheath dress"));
+		 input.add(new Triple<String, Integer, String>("billabong boardshorts trunks", 1316823074, "la siesta boardshorts"));
+		 input.add(new Triple<String, Integer, String>("mootsies tootsies boot", 1503727310, "mootsies tootsies draker"));
+		 input.add(new Triple<String, Integer, String>("nine west bootie", 1503730060, "nine west drina"));
+		 input.add(new Triple<String, Integer, String>("playtex support cotton ", 1331026244, "playtex t723"));
+		 input.add(new Triple<String, Integer, String>("fossil morgan satchel taupe", 1355165745, "fossil morgan satchel"));
+		 input.add(new Triple<String, Integer, String>("katonah womens boots brown", 1420057844, "boc katonah boots"));
+		 input.add(new Triple<String, Integer, String>("boot cut jeans supernova", 1363356262, "levis 527 supernova"));
+		 input.add(new Triple<String, Integer, String>("steve madden buckie boot", 1313965918, "steve madden buckie boot"));
+		 input.add(new Triple<String, Integer, String>("charlies horse tshirt", 1428490587, "charlie s horse shirt"));
+		 input.add(new Triple<String, Integer, String>("igloo little playmate ice chest", 205421625, "igloo little playmate"));
+		 input.add(new Triple<String, Integer, String>("mark nason boot", 1313951044, "mark nason rudd"));
+
+
+
+	}
+
+	public static void main(String[] args){
+		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/relevanceEngine/src/test/resources");
+		ReviewBuilderRunner r = new ReviewBuilderRunner();
+		WebPageReviewExtractor extractor = new WebPageReviewExtractor("C:/workspace/relevanceEngine/src/test/resources");
+		for(Triple query_ID : r.input ){
+			String query = (String) query_ID.getFirst();
+			List<String> res = extractor.formReviewsForAProduct(query);
+
+			ProfileReaderWriter.writeReportListStr(res, "formedReviewSentences"+ query +".csv");
+		}
+
+
+
+	}
+}
\ No newline at end of file

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,137 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.List;
+
+public class ReviewObj {
+	
+		long bpid;
+		long pid;
+		float rating;
+		String pros;
+	    String cons;
+		String url;
+		String title;
+		String review;
+		String keywordsName;
+		float score;
+		String[] origSentences;
+		String[] featurePhrases;
+		
+		List<String> originalizedSentences ; //obtained from sentences;
+		List<String> sentimentPhrases ; //obtained from sentences;
+		
+		public ReviewObj(long bpid, long pid, float rating, String pros,
+				String cons, String url, String title, String review,
+				float score) {
+			super();
+			this.bpid = bpid;
+			this.pid = pid;
+			this.rating = rating;
+			this.pros = pros;
+			this.cons = cons;
+			this.url = url;
+			this.title = title;
+			this.review = review;
+			this.score = score;
+		}
+		
+		
+		public List<String> getSentimentPhrases() {
+			return sentimentPhrases;
+		}
+
+
+		public void setSentimentPhrases(List<String> sentimentPhrases) {
+			this.sentimentPhrases = sentimentPhrases;
+		}
+
+
+		public ReviewObj() {
+			// TODO Auto-generated constructor stub
+		}
+		public String[] getOrigSentences() {
+			return origSentences;
+		}
+		public void setOrigSentences(String[] sentences) {
+			this.origSentences = sentences;
+		}
+		public List<String> getOriginalizedSentences() {
+			return originalizedSentences;
+		}
+
+
+		public void setOriginalizedSentences(List<String> originalizedSentences) {
+			this.originalizedSentences = originalizedSentences;
+		}
+
+
+		public String[] getFeaturePhrases() {
+			return featurePhrases;
+		}
+		public void setFeaturePhrases(String[] featurePhrases) {
+			this.featurePhrases = featurePhrases;
+		}
+		public long getBpid() {
+			return bpid;
+		}
+		public void setBpid(long bpid) {
+			this.bpid = bpid;
+		}
+		public long getPid() {
+			return pid;
+		}
+		public void setPid(long pid) {
+			this.pid = pid;
+		}
+		public float getRating() {
+			return rating;
+		}
+		public void setRating(float rating) {
+			this.rating = rating;
+		}
+		public String getPros() {
+			return pros;
+		}
+		public void setPros(String pros) {
+			this.pros = pros;
+		}
+		public String getCons() {
+			return cons;
+		}
+		public void setCons(String cons) {
+			this.cons = cons;
+		}
+		public String getUrl() {
+			return url;
+		}
+		public void setUrl(String url) {
+			this.url = url;
+		}
+		public String getTitle() {
+			return title;
+		}
+		public void setTitle(String title) {
+			this.title = title;
+		}
+		public String getReview() {
+			return review;
+		}
+		public void setReview(String review) {
+			this.review = review;
+		}
+		public float getScore() {
+			return score;
+		}
+		public void setScore(float score) {
+			this.score = score;
+		}
+		public String getKeywordsName() {
+			
+			return this.keywordsName;
+		}
+		public void setKeywordsName(String kw) {
+			
+			keywordsName=kw;
+		}
+			
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,59 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.textsimilarity.ParseTreeChunk;
+
+public class SentenceBeingOriginalized {
+	private Map<String, String> sentKey_value= new HashMap<String, String>();
+	private String sentence;
+	private List<List<ParseTreeChunk>> groupedChunks;
+	
+	
+	
+	public Map<String, String> getSentKey_value() {
+		return sentKey_value;
+	}
+
+
+
+	public void setSentKey_value(Map<String, String> sentKey_value) {
+		this.sentKey_value = sentKey_value;
+	}
+
+
+
+	public String getSentence() {
+		return sentence;
+	}
+
+
+
+	public void setSentence(String sentence) {
+		this.sentence = sentence;
+	}
+
+
+
+	public List<List<ParseTreeChunk>> getGroupedChunks() {
+		return groupedChunks;
+	}
+
+
+
+	public void setGroupedChunks(List<List<ParseTreeChunk>> groupedChunks) {
+		this.groupedChunks = groupedChunks;
+	}
+
+
+
+	public SentenceBeingOriginalized(Map<String, String> sentKey_value,
+			String sentence, List<List<ParseTreeChunk>> groupedChunks) {
+		super();
+		this.sentKey_value = sentKey_value;
+		this.sentence = sentence;
+		this.groupedChunks = groupedChunks;
+	}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,401 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+
+import opennlp.tools.apps.relevanceVocabs.PhraseProcessor;
+import opennlp.tools.apps.relevanceVocabs.SentimentVocab;
+import opennlp.tools.apps.relevanceVocabs.SynonymListFilter;
+import opennlp.tools.textsimilarity.ParseTreeChunk;
+
+public class SentenceOriginalizer {
+	private String[] sents; 
+	private SentenceBeingOriginalized[] sentenceBeingOriginalized;
+	public List<String> formedPhrases = new ArrayList<String>();
+
+	private MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();
+	private SentimentVocab sVocab = SentimentVocab.getInstance();
+	PhraseProcessor pProc = new PhraseProcessor();
+	SynonymListFilter filter = null;
+	private List<String> verbsShouldStayNoSubstition = Arrays.asList(new String[]{
+			"might", "can", "power", "bonk", "screw", "victimization", "victimize", "victimised", "victimized", "victimise",
+			"hump", "sluttish", "wanton"
+	});
+
+	public SentenceOriginalizer(String[] ss){
+		sentenceBeingOriginalized = new SentenceBeingOriginalized[ss.length];
+		for(int i= 0; i< ss.length; i++){
+			//sentenceBeingOriginalized[i] = new  SentenceBeingOriginalized()
+		}
+	}
+
+	public SentenceOriginalizer(String dir){
+		filter = new  SynonymListFilter(dir);
+	};
+
+	public String[] getSents() {
+		return sents;
+	}
+
+	public void setSents(String[] sents) {
+		this.sents = sents;
+	}
+
+	
+
+	private void substituteProsCons(){
+		for(int i = 0; i< sents.length; i++){
+			if (sents[i]==null)
+				continue;
+
+			sents[i] = sents[i].replace("...", " ").replace("..", " ");
+
+			if (sents[i].startsWith("Pros")){
+				sents[i]="";
+				sents[i+1] = "I liked that "+ sents[i+1];
+			}
+
+			if (sents[i].startsWith("Cons")){
+				sents[i]="";
+				sents[i+1] = "What I did not like was that "+ sents[i+1];
+			}
+		}
+	}
+
+	private void insertProductNameForRefs(String prodName){
+		prodName = prodName.toLowerCase();
+		prodName = StringUtils.trim(prodName);
+		
+		for(int i = 0; i< sents.length; i++){
+			if (sents[i]==null)
+				continue;
+			String snt = sents[i];
+			String line  = snt.replace(" it ", " "+prodName+" ");
+			if (line.equals(snt)){
+				line = snt.replace(" this ", " "+prodName+" ");
+			}
+
+			sents[i]=line;
+		}
+	}
+	
+	private void insertProductNameForRefsFullNameKeywords(String prodName, String keywordsName){
+		prodName = StringUtils.trim(prodName.toLowerCase());
+				
+		for(int i = 0; i< sents.length; i++){
+			double flag = Math.random();
+			String prodNameCurr = null;
+			if (flag>0.4)
+				prodNameCurr = prodName;
+				else
+					prodNameCurr = keywordsName;
+					
+			if (sents[i]==null)
+				continue;
+			String snt = sents[i];
+			String line  = snt.replace(" it ", " "+prodNameCurr+" ");
+			if (line.equals(snt)){
+				line = snt.replace(" this ", " "+prodNameCurr+" ");
+			}
+
+			sents[i]=line;
+		}
+	}
+
+	private void turnTenseToPast(){
+		for(int i = 0; i< sents.length; i++){
+			if (sents[i]==null)
+				continue;
+			sents[i] = sents[i].replace("to do ", "to d_o_ ");
+			sents[i]=sents[i].replace(" is ", " was ").replace(" done ", " was done ").replace(" are ", " were ")
+					.replace(" do ", " did ").replace(" yes, ", " true, ");
+			sents[i]=sents[i].replace("somebody ", "one ").replace("would like", "would want").replace("I am", "users are");
+			sents[i]=sents[i].replace("my wife", "my spouse").replace("I would definitely buy ", "I wouldn't hesitate to buy ")
+					.replace("I haven't tried ", "I did not actually have a chance to try ");
+			sents[i]=sents[i].replace("they arrived ", "they were shipped to my residence ").replace(" ive ", " I have ")
+					.replace("We have ", "I have already tried and written a review on ");
+			
+			sents[i] = sents[i].replace( "to d_o_ ", "to do ");
+	
+			if (sents[i].startsWith("We "))
+				sents[i] = sents[i].replace("We ", "I know they ");
+			if (sents[i].startsWith("You "))
+				sents[i] = sents[i].replace("You ","I believe one can ");
+			
+			if (sents[i].startsWith("Well "))
+				sents[i] = sents[i].replace("Well ","I would state that ");
+
+		}
+	}
+
+	private void turnCounterFactual(){
+		for(int i = 0; i< sents.length; i++){
+			if (sents[i]==null)
+				continue;
+			sents[i]=sents[i].replace("however ", "b1ut1 ").replace("but ", "however ")
+					.replace("b1ut1 ", "but ").replace("I say", "I repeat").
+					replace("same way", "same manner").replace(" you ", " somebody ").replace(" can ", " might ");
+
+		}
+	}
+
+	public void substituteSynonymVerbs(){
+		for(int i = 0; i< sents.length; i++){
+			String line = sents[i];
+			List<List<ParseTreeChunk>> ps = pProc.getPhrasesOfAllTypes(line);
+			if (ps==null || ps.size()<2)
+				continue;
+
+			List<ParseTreeChunk> vps = ps.get(1);
+
+			extractNounPhrasesWithSentiments(ps.get(0));
+
+			line = substituteSentimentSynonyms(line, ps);
+
+			if (vps==null)
+				continue;
+			boolean bVerbRule = false;
+			if (vps.size()==1)
+				line = rePhraser.rePhrase(line);
+			else {
+				if (vps.size()>1)
+
+					for (ParseTreeChunk v: vps){
+						String verbLemma = v.getLemmas().get(0);
+						String newVerb = filter.getSynonym(verbLemma);
+						if (newVerb!=null && newVerb.length()>3 && verbLemma.length()>3 // both old and new words should be above 3
+								&& !newVerb.endsWith("ness") // empirical rule
+								&& !verbsShouldStayNoSubstition.contains(verbLemma) &&
+								!verbsShouldStayNoSubstition.contains(newVerb)	){
+							line = line.replace(verbLemma+" ", newVerb+" "); 	
+							line = line.replace(" "+verbLemma, " "+newVerb); 
+							System.out.println("Synonym for verb substitution: "+verbLemma + "->"+newVerb);
+							bVerbRule = true;
+						}
+					}
+				if (!bVerbRule && vps.size()==2 && Math.random()>0.8) // no other means of originalization worked, so do inverse translation
+					line = rePhraser.rePhrase(line);
+			}
+			sents[i]=line;
+
+		}
+	}
+
+
+	private String substituteSentimentSynonyms(String line,
+			List<List<ParseTreeChunk>> ps) {
+		List<ParseTreeChunk> nounPhrases = ps.get(0);
+		if (nounPhrases.size()<1)
+			return line;
+
+		for(ParseTreeChunk ch: nounPhrases){
+			List<String> lemmas = ch.getLemmas();
+			for(String oldSentim:lemmas){
+				if ( sVocab.isSentimentWord(oldSentim.toLowerCase())) {
+					String newSentim = filter.getSynonym(oldSentim);
+					if (newSentim!=null && newSentim.length()>3 && !verbsShouldStayNoSubstition.contains(newSentim)
+							&& !verbsShouldStayNoSubstition.contains(oldSentim)){
+						line = line.replace(oldSentim+" ", newSentim+" "); 	
+						line = line.replace(" "+oldSentim, " "+newSentim);
+						System.out.println("Synonym for sentiment substitution: "+oldSentim + "->"+newSentim);
+					}
+				}
+			}
+		}
+
+		return line;
+	}
+
+	private void extractNounPhrasesWithSentiments(List<ParseTreeChunk> list) {
+		List<String> phrasesWithSentiments = new ArrayList<String>();
+		for(ParseTreeChunk ch: list){
+			List<String> lemmas = ch.getLemmas();
+			for(String l:lemmas){
+				if ( sVocab.isSentimentWord(l.toLowerCase())) {
+					phrasesWithSentiments.add(lemmas.toString());
+				}
+			}
+		}
+		formedPhrases.addAll(phrasesWithSentiments);
+	}
+
+	public String[] convert(String[] sents, String name, String keywordsName){
+		name = name.replace("Amazon.com:" , "").replace("Amazon.com" , "").replace("..." , " ")
+				.replace("Customer Reviews: ", "");
+
+		this.sents = sents;
+		try {
+			substituteProsCons();
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		try {
+			//insertProductNameForRefs(name);
+			insertProductNameForRefsFullNameKeywords(name, keywordsName);
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		try {
+			turnTenseToPast();
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		try {
+			turnCounterFactual();
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+
+		try {
+			substituteSynonymVerbs();
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		// remove dupes
+		this.formedPhrases = new ArrayList<String>(new HashSet<String>(this.formedPhrases));
+
+		return sents;
+
+	}
+
+	public static void main(String[] args){
+		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/productsearchfe/src/test/resources");
+		SentenceOriginalizer orig = new SentenceOriginalizer("src/test/resources");
+		String[] sents = new String[] {
+				"Leave the bulky stabilization rig at home and take smooth handheld videos from any angle thanks to Optical SteadyShot image stabilization with Active Mode."
+				//"Other then that, it works well, and the chain stops instantly when you let go of the trigger, or push the safety bar."	
+		};
+		String[] res = orig.convert(sents, "VIP Product", "vv propro");
+		System.out.println(Arrays.asList(res));
+	}
+
+}
+
+/*
+ * 1.	Some Amazon specific text keeps showing up so we might want to put a filter on recurring phrases such as:
+1.	Unlimited Free Two-Day Shipping
+2.	View Larger
+3.	What's in the box
+2.	Period/stop added to punctuation marks: 
+1.	!.
+2.	?.
+3.	:.
+4.	.". 
+5.	-.
+3.	Saw some HTML formatting occasionally, such as <em></em>
+4.	Redundancy with choice phrases appearing multiple times in a single review
+5.	Specific issue with words being added at the end of the letter "s," creating nonsensical words:
+1.	It mispronouncesulphur virtually every caller'sulphur name in waysulphur that..
+2.	In fact, it'southward a rare feature that I recollect southwardhould be commonplace in any southwardurround receiver.
+6.	Adding -iness to make nonsensical words: mightinessiness, powerinessiness
+
+ */
+
+
+
+/*
+ * After using a gasoline powered chain saw for many years had to stop using because of dust and fumes made my copd worse this electric saw is great has surprising amount of power without the gas fumes..
+Nice chainsaw, works great, well built.
+The instant-stop chain is very safe, but a bit abrupt when releasing the trigger.
+I wish there were a half-way release that turned off the motor but did not engage the instant stop break.
+Pros .
+inexpensive compared to gas chainsaws, lightweight, cuts with good power, will do most anything that a gas chainsaw will do. like the automatic chain oiler and easy tension adjustment.
+Cons .
+If you are cutting larger branches and trees, a gas is better.
+However this will work on 8-10" size very well.
+Bought this McCulloch electric chainsaw to replace an old Craftsman electric chain saw. (the Craftsman got ran over by a car).
+Compared to my old Craftsman electric chain saw, the McCulloch seems to be wonderful.
+The first test was to cut a 16" diameter oak branch, cut thru it like hot butter.
+The "no tools needed" chain tensioner seems to be a good design..
+Is a good saw, however it came with the handle that wraps abound the left side of the saw was broken.
+The box looked good, but the saw itself was damaged.
+However, because I had a lot of tree damage in my yard, and more storms coming, I made due with it.
+Other then take, it works well, and the chain stops instantly when you let go of the trigger, or push the safety bar.
+stump w/ this E-saw.
+It keeps doing a super job.
+In terms of a replacement chain, make sure to get the Oregon S-54 (S is style of cutter, 54 means 54 links).
+The MC literature suggests use of a S-55, but it is TOO Long and will soon wind up in the trash can.
+ALSO, the MC factory installed gasket for the lube oil, between the saw and chain bar is total trash.
+When changing out the chain, pull the bar off, pull out and throw away the MC factory gasket, clean the bar and apply a piece of electrical tape, using a knife to cut out a pathway for oil to the bar.
+Will lube perfectly now!
+This is the second electric McCilloch 16" chain saw that I have owned and it is even better and more powerful than the first.
+I still use a gas chain saw out in the woods on my property but I usually do just enough cutting with it to get the logs on a trailer so I can take them bach to my shed to cut them up and save the sawdust for on my garden and flower beds as mulch.
+This electric is lighter and more powerful than my gas saw and makes short work of even 14" well-seasoned oak and poppel logs with a minimum of effort.
+I highly recommend this sae for anyone who has an electric outlet close enough to their cutting station.
+Bought this McCulloch electric chainsaw to replace an old Craftsman electric chain saw. (the Craftsman got ran over by a car).
+Compared to my old Craftsman electric chain saw, the McCulloch seems to be wonderful.
+The first test was to cut a 16" diameter oak branch, cut thru it like hot butter.
+The "no tools needed" chain tensioner seems to be a good design (design seems to be similar to that used by other manufacturers).
+Assuming. this thing keeps cutting/running the same way in the long term, then we have a winner. (note. all the electric chain saws come with cheap looking chains with cutting blades spaced very widely apart along the chain.
+To be ready for the bigger cutting jobs I sprung for a new $18 Oregon s-54 16" chain.).
+Update .
+Having used both gas and electric chain saws for more years than I care to remember, this little beauty is far more than I'd hoped for.
+Yes, it requires a cord to function and, without a handy "Current Bush", serves no useful purpose, but for trimming trees or cutting up firewood in a yard it beats H*** out of fighting the frustration when a gas saw refuses to start or remain running.
+I have another 14" electric MuCulloch along with a 16" gas Homelite and consider this to be a combination of the best qualities of both the others, the convenience of the small electric and the greater cutting ability of the gas powered Homelite.
+This little beauty appears to have as much power as the gas saw without the hassle of mixing fuel and the ongoing maintenence associated with it and cuts far faster than it's small electric brother.
+If I was forced to have a single chainsaw, in my present position(Pleanty of fire wood handy, just in need of cutting to the proper dimensions), this baby would be may choice without any douts or reservations.
+Ordered the Mcculloch 16inch electric chain saw to do some serious pruning of trees around the house which had severe frost damage.
+Although an electric chain saw, it cut through trees up to eight inches like a hot knife through butter.
+Not once did i have problems in two days of cutting.
+The big pros I noticed while using is realtively lightweight for a chainsaw and you can hold in one hand to use.
+Once you release the power switch, the chainsaw chain immediately stops!.
+This is a good thing as it keeps body parts attached.
+One nifty thing about this chainsaw is the chain tightener is outstanding once you figure how it works.
+No tools, just move the knobs and tighten, couldn't be easier and definitely beats hunting down a wrench to tighten.
+Only con is being electric, you have to watch the power cord.
+Very easy to hit extension cord if not careful.
+But it wakes you up when you are tired from your yard work.
+Let a good buddy borrow it and he was also impressed with the ease of use.
+Outstanding for jobs around you house, two thumbs up!
+The McCulloch3516F chainsaw puts an end to my problem of gas engines that don't start when I really need them to.
+I have been cutting out maple branches this summer from trees with verticillium wilt . branches up to 8 inches are no problem at all.
+This saw has an impressive safety feature. a chain brake that stops the saw instantly as soon as the trigger is released or the safety guard is pushed forward.
+I mean instantly. there is a loud clunk as the brake engages and the chain stops dead.
+This takes some getting used to, as the brake engages if you wiggle your finger while running the chainsaw, causing the chain to start and stop.
+There is no concept of "revving" the chain.
+It also means there is no "idle" speed for the chain.
+It is on or off.
+And that is safe.
+You can also consider it a safety feature that the chain has fewer cutting teeth than my gas powered saw chains.
+I don't know the relative operating RPMs .
+if they are about the same, this saw seems to cut a little slower, and fewer teeth would do that.
+This makes the saw less aggressive and less likely to pull out of your control.
+I like that.
+As I say, the cutting ability is well in excess of the 8" branches I've been dealing with.
+The oil fill is conveniently located so that you don't have to tip the saw to fill it, although a small funnel is helpful.
+Overall, I am very happy with this chainsaw.
+The saw works very well, overall.
+I have some minor complaints:.
+1.
+The chain drive gear cover requires a Phillips screwdriver to get the cover off.
+This is just dumb !.
+There's no good reason why it shouldn't have a thumbscrew similar to, but smaller than the chain tensioner thumbscrew.
+As someone pointed out, the chain gear area regularly gets clogged with oily sawdust that needs to be cleaned out.
+I can't figure out a good excuse for this design mistake.
+2 .
+The "instant chain stop" feature woks well, but the remaining motor drivetrain makes a loud howling screech until the motor actually stops.
+Makes me think there might be something wrong with the drivetrain.
+The saw seems to work well, though.
+Time will tell.
+3 .
+The oil filler neck is titled to the side, not vertical to the saw when placed on level ground.
+This makes viewing the oil stream going in and the rising oil level unnecessarily difficult.
+This is another obvious design mistake.
+4 .
+This is my first chainsaw, but it seems the bar oil reservoir is ridiculously small !.
+I have to refill it every 10 minutes of use.
+After reading other reviews for this model I immediately threw out the stock chain without ever using it and replaced it with an Oregon model S52 chain (dual chains is model ST52).
+Note that it fits fine although it is advertized as a 14 inch chain and this saw is advertized to be 16 inches.
+Go figure..
+Also, after reading about the risk of burning up the motor due to using a too lightweight extension cord, I bought a "US Wire 65100 12/3 100-Foot SJTW Orange Heavy Duty Extension Cord".
+It's heavy, alright !
+ */

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,21 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.BingQueryRunner;
+import opennlp.tools.similarity.apps.HitBase;
+
+public class URLsWithReviewFinderByProductName {
+BingQueryRunner search = new BingQueryRunner();
+	
+	public List<String> findFacebookURLByNameAndZip(String name){
+		List<HitBase> foundFBPages = search.runSearch(name, 20);
+		List<String> results = new ArrayList<String>();
+		for(HitBase h: foundFBPages){
+			results.add(h.getUrl());
+		}
+		return results;
+	}
+	
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,444 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import opennlp.tools.jsmlearning.ProfileReaderWriter;
+import opennlp.tools.parse_thicket.apps.WebPageExtractor;
+import opennlp.tools.similarity.apps.HitBase;
+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
+import opennlp.tools.similarity.apps.utils.Utils;
+import opennlp.tools.textsimilarity.TextProcessor;
+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
+
+import org.apache.commons.lang.StringUtils;
+
+public class WebPageReviewExtractor extends WebPageExtractor {
+	
+	BingAPIProductSearchManager prodman = new BingAPIProductSearchManager();
+	SentenceOriginalizer orig = null;
+		
+	public WebPageReviewExtractor(String resourceDir) {
+		orig = new SentenceOriginalizer(resourceDir);
+	}
+
+	public String[] removeDuplicates(String[] hits)
+	{
+		StringDistanceMeasurer meas = new StringDistanceMeasurer();
+
+		List<Integer> idsToRemove = new ArrayList<Integer>();
+		List<String> hitsDedup = new ArrayList<String>();
+		try
+		{
+			for (int i = 0; i < hits.length; i++)
+				for (int j = i + 1; j < hits.length; j++)
+				{
+					String title1 = hits[i];
+					String title2 = hits[j];
+					if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))
+						continue;
+					if (meas.measureStringDistance(title1, title2) > 0.7)
+					{
+						idsToRemove.add(j); // dupes found, later list member to
+											// be deleted
+					}
+				}
+			for (int i = 0; i < hits.length; i++)
+				if (!idsToRemove.contains(i))
+					hitsDedup.add(hits[i]);
+			if (hitsDedup.size() < hits.length)
+			{
+				System.out.println("Removed duplicates from relevant search results, including "
+					+ hits[idsToRemove.get(0)]);
+			}
+		}
+		catch (Exception e)
+		{
+			System.out.println("Problem removing duplicates from relevant images");
+		}
+
+		return hitsDedup.toArray(new String[0]);
+
+	}
+
+	public ReviewObj extractSentencesWithPotentialReviewPhrases(String url)
+	{
+		ReviewObj reviewObj = new ReviewObj();
+		int maxSentsFromPage= 20;
+		List<String[]> results = new ArrayList<String[]>();
+
+		String downloadedPage = pageFetcher.fetchPage(url, 20000);
+		if (downloadedPage == null || downloadedPage.length() < 100)
+		{
+			return null;
+		}
+
+		String pageOrigHTML = pageFetcher.fetchOrigHTML(url);
+
+		List<String> productFeaturesList = new ArrayList<String> ();
+		String[] productFeatures = StringUtils.substringsBetween(pageOrigHTML, "<li>", "</li>" );
+		if (productFeatures!=null){
+			for(String item: productFeatures ){
+				if (item.indexOf("class")>-1 || item.indexOf("www.")>-1 || item.indexOf("href")>-1)
+					continue;
+				item = item.replace("<span>","").replace("</span>","").replace("<b>","").replace("</b>","");
+				if (item.length()>80 && MinedSentenceProcessor.acceptableMinedSentence(item)==null){
+					System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+item);
+					continue;
+				}
+				productFeaturesList .add(item);
+			}
+		}
+		
+		productFeaturesList = cleanProductFeatures(productFeaturesList);
+		
+		String startArea = StringUtils.substringBetween(pageOrigHTML, "reviewHistoPop", "t of 5 stars");
+		String item =  StringUtils.substringBetween(startArea, "title=\"","ou" );
+		if (item==null){//title="4.0 out of 5 stars" ><span>4.0 out of 5 stars</span>
+			int index = pageOrigHTML.indexOf("of 5 stars\"");
+			startArea = StringUtils.substringBetween(pageOrigHTML, "of 5 stars\"", "of 5 stars");
+			item =  StringUtils.substringBetween(startArea, "<span>","ou" );
+		}
+
+		// if found, process
+		if (item!=null){
+			try {
+				float rating = Float.parseFloat(item);
+				reviewObj.setRating(rating);
+			} catch (NumberFormatException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			}
+		}
+		//productFeaturesList .add(item);
+
+		downloadedPage= downloadedPage.replace("     ", "&");
+		downloadedPage = downloadedPage.replaceAll("(?:&)+", "#");
+		String[] sents = downloadedPage.split("#");
+		List<TextChunk> sentsList = new ArrayList<TextChunk>();
+		for(String s: sents){
+			s = s.trim().replace("  ", ". ").replace("..", ".").replace(". . .", " ")
+					.replace(": ", ". ").replace("- ", ". ").
+					replace (". .",".").trim();
+			sentsList.add(new TextChunk(s, s.length()));
+		}
+
+		Collections.sort(sentsList, new TextChunkComparable());
+		String[] longestSents = new String[maxSentsFromPage];
+		int j=0;														// -1 removed
+		for(int i=sentsList.size()-1 -maxSentsFromPage; i< sentsList.size()&& j<longestSents.length; i++){
+			longestSents[j] = sentsList.get(i).text;
+			j++;
+		}
+
+		sents = cleanListOfSents(longestSents);
+		
+		sents = removeDuplicates(sents);
+		sents = verifyEnforceStartsUpperCase(sents);
+
+		reviewObj.setFeaturePhrases(productFeaturesList.toArray(new String[0]));
+		reviewObj.setOrigSentences(sents);
+
+		return reviewObj;
+	}
+
+	private String[] verifyEnforceStartsUpperCase(String[] sents) {
+		for(int i=0; i<sents.length; i++){
+			String s = sents[i];
+			s = StringUtils.trim(s);
+			String sFirstChar = s.substring(0, 1);
+			if (!sFirstChar.toUpperCase().equals(sFirstChar)){
+				s = sFirstChar.toUpperCase()+s.substring(1);
+			}
+			sents[i] = s;
+		}
+			return sents;
+	}
+
+	private List<String> cleanProductFeatures(List<String> productFeaturesList) {
+		List<String> results = new ArrayList<String>();
+		for(String feature: productFeaturesList){
+			if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)
+				continue;
+			results.add(feature);
+		}
+		return results;
+	}
+
+	protected String[] cleanListOfSents(String[] longestSents)
+	{
+		float minFragmentLength = 40, minFragmentLengthSpace=4;
+
+		List<String> sentsClean = new ArrayList<String>();
+		for (String sentenceOrMultSent : longestSents)
+		{
+			if (MinedSentenceProcessor.acceptableMinedSentence(sentenceOrMultSent)==null){
+				System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+sentenceOrMultSent);
+				continue;
+			}
+			// aaa. hhh hhh.  kkk . kkk ll hhh. lll kkk n.
+			int numOfDots = sentenceOrMultSent.replace('.','&').split("&").length;
+			float avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;
+			if ( avgSentenceLengthInTextPortion<minFragmentLength)
+				continue;
+			// o oo o ooo o o o ooo oo ooo o o oo
+			numOfDots = sentenceOrMultSent.replace(' ','&').split("&").length;
+			avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;
+			if ( avgSentenceLengthInTextPortion<minFragmentLengthSpace)
+				continue;
+
+			List<String> furtherSplit = TextProcessor.splitToSentences(sentenceOrMultSent);
+			
+			// forced split by ',' somewhere in the middle of sentence
+			// disused - Feb 26 13
+			//furtherSplit = furtherMakeSentencesShorter(furtherSplit);
+			furtherSplit.remove(furtherSplit.size()-1);
+			for(String s : furtherSplit){
+				if (s.indexOf('|')>-1)
+					continue;
+				s = s.replace("<em>"," ").replace("</em>"," ");
+				s = Utils.convertToASCII(s);
+				sentsClean.add(s);
+			}
+		}
+
+		return (String[]) sentsClean.toArray(new String[0]);
+	}
+
+	private List<String> furtherMakeSentencesShorter(List<String> furtherSplit) {
+		int MIN_LENGTH_TO_SPLIT = 80;
+		List<String> results = new ArrayList<String>();
+		for(String sent: furtherSplit) {
+			sent = startWithCapitalSent(sent);
+			int len = sent.length(); 
+			if (len <MIN_LENGTH_TO_SPLIT)
+				results.add(sent);
+			else {
+				try {
+					int commaIndex = StringUtils.indexOf(sent, ',');
+					int lastCommaIndex = StringUtils.lastIndexOf(sent, ',');
+					int splitIndex = -1;
+					if (Math.abs(commaIndex- len/2) > Math.abs(lastCommaIndex- len/2))
+						splitIndex = commaIndex;
+					else
+						splitIndex = lastCommaIndex;
+					if (splitIndex<0)
+						results.add(sent);
+					else {
+						String sent1 = sent.substring(0, splitIndex)+". ";
+						String sent2 = startWithCapitalSent(sent.substring(splitIndex+1));
+						results.add(sent1); results.add(sent2);
+					}
+				} catch (Exception e) {
+					results.add(sent);
+					e.printStackTrace();
+				}
+
+			}
+		}
+		return results;
+	}
+
+	private String startWithCapitalSent(String sent) {
+		String firstChar = sent.substring(0,1);
+		String remainder = sent.substring(1);
+		
+		return firstChar.toUpperCase()+remainder;
+	}
+
+	public List<String> formReviewsForAProduct(String name /*long bpid, String keywordsName*/){
+		ReviewObj reviewObjTotal = null;
+		try {
+			List<HitBase> pagesForAProduct = prodman.findProductByName(name, 1);
+			reviewObjTotal = null; 
+
+			for(HitBase p: pagesForAProduct){
+				ReviewObj reviewObj = 
+						extractSentencesWithPotentialReviewPhrases(p.getUrl());
+				// init with first element
+				if (reviewObjTotal  == null)
+					reviewObjTotal = reviewObj;
+				if (reviewObj==null)
+					continue;
+				String[] afterOriginalization = orig.convert(reviewObj.getOrigSentences(), p.getTitle(), reviewObj.getKeywordsName());
+				reviewObj.setOriginalizedSentences(Arrays.asList(afterOriginalization));
+				reviewObj.setSentimentPhrases(orig.formedPhrases);
+
+				List<String> buf = reviewObjTotal.getSentimentPhrases();
+				if (orig.formedPhrases!=null && orig.formedPhrases.size()>0){
+					buf.addAll(orig.formedPhrases);
+					reviewObjTotal.setSentimentPhrases(buf);
+				}
+
+		/*		buf = reviewObjTotal.getOriginalizedSentences();
+				if (buf!=null && afterOriginalization!=null && afterOriginalization.length>0){
+					List<String> b1 = Arrays.asList(afterOriginalization);
+					List<String> b2 = new ArrayList<String>();
+					b2.addAll(buf);
+					b2.addAll(new ArrayList<String>(b1));
+					reviewObjTotal.setOriginalizedSentences(b2);
+				}
+*/
+			}
+			if (reviewObjTotal==null) return new ArrayList<String>();
+			
+			List<String> textReviews = buildManyReviewTexts(reviewObjTotal);
+
+			
+		/*	String textReview = buildText(reviewObjTotal);
+			try {
+				if (textReview!=null && textReview.length()>60)
+					ser.saveReviewsToDB(textReview, bpid, pagesForAProduct.get(0).getUrl(), pagesForAProduct.get(0).getTitle(),
+							reviewObjTotal.getSentimentPhrases().toString(), reviewObjTotal.getRating());
+			} catch (Exception e) {
+				System.out.println("Database write failed");
+			}
+			*/
+			
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		} 
+		return reviewObjTotal.getOriginalizedSentences();
+	}
+
+	private String buildText(ReviewObj reviewObj) {
+
+		String[] features = reviewObj.getFeaturePhrases();
+		List<String> sentences =reviewObj.getOriginalizedSentences();
+		StringBuffer buf = new StringBuffer();
+		int count = 0;
+		for(String sent:sentences){
+			if (sent!=null)
+				buf.append(sent+" ");
+			if (count%2==0 && count<features.length)
+				if (features[count]!=null){
+					buf.append(features[count]);
+					if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?") 
+							||features[count].endsWith(".\"") ))
+						buf.append(". ");
+				}
+
+			if (count%5==0)
+				buf.append("\n");
+			count++;
+		}
+		return buf.toString();
+	}
+	
+	private List<String> buildManyReviewTexts(ReviewObj reviewObj) {
+
+		String[] features = reviewObj.getFeaturePhrases();
+		List<String> sentences =reviewObj.getOriginalizedSentences();
+		
+		// first count how many sentences
+				int NUM_SENTS_IN_REVIEW = 7;
+				int count=0;
+				for(String sent:sentences){
+					if (sent!=null)
+						count++;
+				}
+		int nReviews = count/NUM_SENTS_IN_REVIEW;
+		if (nReviews<1)
+			nReviews=1;
+		StringBuffer[] bufs = new StringBuffer[nReviews];
+		for(int i=0; i<bufs.length; i++){
+			bufs[i] = new StringBuffer();
+		}
+				
+		count = 0;
+		int currentRevIndex = 0;
+		for(String sent:sentences){
+			if (sent!=null)
+				bufs[currentRevIndex].append(sent+" ");
+			if (count%2==0 && count<features.length)
+				if (features[count]!=null){
+					bufs[currentRevIndex].append(features[count]);
+					if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?") 
+							||features[count].endsWith(".\"") ))
+						bufs[currentRevIndex].append(". ");
+				}
+
+			try {
+				if (bufs[currentRevIndex].toString().split(".").length>4)
+					bufs[currentRevIndex].append("\n");
+			} catch (Exception e) {
+				e.printStackTrace();
+			}
+			
+			count++;
+			currentRevIndex++;
+			if (currentRevIndex>=nReviews)
+				currentRevIndex=0;	
+		}
+		
+		List<String> results = new ArrayList<String>();
+		for(StringBuffer b:bufs){
+			String sent = b.toString().replace("!.","!").replace("?.","?");
+			results.add(sent);
+		}
+		return results;
+	}
+
+	public static void main(String[] args){
+		String resourceDir = "C:/stanford-corenlp/src/test/resources/";
+		ParserChunker2MatcherProcessor proc = ParserChunker2MatcherProcessor.getInstance(resourceDir); 
+			
+		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/relevanceEngine/src/test/resources");
+
+		WebPageReviewExtractor extractor = new WebPageReviewExtractor(resourceDir);
+		String res1[] = extractor.verifyEnforceStartsUpperCase(new String[]{ "hhhh !", "Klyn mng hghj ."});
+				
+		List<String> res = extractor.formReviewsForAProduct(//"McCulloch 16-Inch 3.5 HP Electric Chain Saw");
+				//	"WORX Electric JawSaw with Extension Handle");
+				//	"Panasonic 2-Line Digital Cordless System", 215200345l);
+				//	"Sport Silver Dial Women", 215475290);
+				//"Rectangle Area Rug", 213885290);
+				//		"40VA Replacement Transformer", 213085391);
+				//		"PSYLLIUM POWDER Food", 213185391);
+				//		"Leighton Toilet Tank", 213285391);
+				//"Samsung Knack U310 Flip Phone", 214495493);
+				//"Panasonic Cordless Phone 2 handsets", 214870820);
+				//"Winegard TV Antenna Pre-Amplifier", 211924499);
+				//"Atlona AT-HD-V18 HDMI Distribution Amplifier", 215162612);
+				//"airport express base station", 211462827);
+				//"denon  Network Streaming A/V Home Theater receiver", 209565926);
+				//"sherwood receiver 400 watts stereo", 211286714);
+				//"multizone music distribution system", 205333526);
+				//"niles zr4", 215104912);
+				//"alpine waterproof marine cd receiver", 215167695);
+				//"sherwood channel receiver dolby", 215116818);
+				//"sherwood lcd tv widescreen hdtv", 215481917);
+				//"multiroom music distribution system", 205333526);
+				//		"fusion ms compact stereo", 215649463); 
+				//"pyle pro speaker", 213265125);
+				// "apple iphone 4g",  213265325);
+				//"sherwood high performance receiver", 215394729);
+				//"sony camera housing", 211960592);
+				//"sony xl2100", 1135329203);
+				//"sony 18 megapixel-digital-camera", 215743208);
+				//"sony m470 microcassette tape recorder", 205828052);
+				//"sony monitor terminal expansion board", 213244217);
+				//"sony cybershot digital-camera", 215743207);
+				//"sony interchangeable lens handycam camcorder", 215153503);
+				//"canon powershot digital camera", 214754207);
+				//"brother ink pageyield yellow", 204743189);
+				// ?? "garmin 2200 gps navigator", 215167480);
+				"halo portable backup battery");
+
+		ProfileReaderWriter.writeReportListStr(res, "formedReviewSentences4.csv");
+
+
+		/*		
+			res=	extractor. extractSentencesWithPotentialReviewPhrases(//"http://www.sitbetter.com/view/chair/ofm-500-l/ofm--high-back-leather-office-chair/");
+		//"http://www.amazon.com/OFM-High-Back-Leather-Integral-Headrest/dp/B002SIW1E0/ref=sr_1_1?ie=UTF8&qid=1353370254&sr=8-1&keywords=OFM-High-Back-Leather-Integral-Headrest");
+		//"http://www.amazon.com/Oregon-511AX-Chain-Grinder-Sharpener/dp/B0000AX0CY/ref=sr_1_4?s=industrial&ie=UTF8&qid=1353373435&sr=1-4&keywords=chain+saws");
+			//			"http://www.amazon.com/Bearing-UCP204-12-Housing-Mounted-Bearings/dp/B002BBIYWM/ref=sr_1_1?s=industrial&ie=UTF8&qid=1353373786&sr=1-1&keywords=pillow+block+bearing");
+			"http://www.amazon.com/ShelterLogic-20--Feet-Auto-Shelter/dp/B001OFNK8O/ref=sr_1_1?s=lawn-garden&ie=UTF8&qid=1353376677&sr=1-1&keywords=shelterlogic+62680+autoshelter+portable+garage+carport");			
+						System.out.println(res);
+		 */			
+
+	}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,171 @@
+package opennlp.tools.apps.utils.email;
+
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.mail.*;
+import javax.mail.internet.*;
+import javax.activation.*;
+/**
+ * Responsible to sending e-mails trough a gmail smtp server.
+ * It will be extended to handle arbitrary smtp servers.
+ * @author GaDo
+ *
+ */
+public class EmailSender {
+		private static final long serialVersionUID = 1L;
+		private static final String mailboxAddress="bgalitsky@hotmail.com";
+
+		public  boolean sendMail(String smtp, String user, String pass, InternetAddress from, InternetAddress[] to, InternetAddress[] cc, InternetAddress[] bcc, String subject, String body, String file) throws Exception
+		{
+			boolean correct=true;
+			try
+			{							
+				//Eliminate spaces from addresses
+				if(from!=null){		
+					from.setAddress(from.getAddress().replace(" ","").trim());		}
+					to = eliminateSpaces(to);
+					cc = eliminateSpaces(cc);
+					bcc = eliminateSpaces(bcc);
+					correct = validateAddress(from,to,cc,bcc);
+				
+				if(correct){
+					//Configuracio of the properties -> smtp
+					Properties props = new Properties();
+					props.put("mail.smtp.host", smtp);
+					props.put("mail.smtp.auth", "true");
+					props.put("mail.smtp.port", "587");
+					props.put("mail.smtp.starttls.enable", "true");
+					Authenticator auth = new SMTP_Authenticator	(user, pass);
+					Session session = Session.getInstance(props, auth);
+					//Session session = Session.getDefaultInstance(props);
+					//props.put("mail.smtp.user",user);
+					//props.put("mail.smtp.password",pass);
+												    
+				    //Composing the message
+				    MimeMessage message = new MimeMessage(session);
+				      message.setFrom(from);
+				    message.setRecipients(Message.RecipientType.TO,to);
+				    message.setRecipients(Message.RecipientType.CC,cc);
+				    message.setRecipients(Message.RecipientType.BCC,bcc);
+				    message.setSubject(subject);
+				    if(file==null)
+				    {
+				    	
+					    //message.setText(body);
+				    	message.setContent(body, "text/html");
+				    }
+				    else
+				    {
+					    Multipart multipart = new MimeMultipart();
+					    BodyPart messageBodyPart;
+					    messageBodyPart = new MimeBodyPart();
+					    messageBodyPart.setContent(body, "text/html");
+					    //messageBodyPart.setText(body);
+					    multipart.addBodyPart(messageBodyPart);
+					    messageBodyPart = new MimeBodyPart();
+					    DataSource source = new FileDataSource(file);
+					    messageBodyPart.setDataHandler(new DataHandler(source));
+					    messageBodyPart.setFileName(file);
+					    multipart.addBodyPart(messageBodyPart);
+		
+					    message.setContent(multipart);
+				    }
+		
+					Transport tr = session.getTransport("smtp");			
+					tr.connect(smtp, mailboxAddress, pass);
+					message.saveChanges();
+					tr.sendMessage(message, message.getAllRecipients());
+					tr.close();
+				}
+		    }
+			catch(Exception e)
+			{
+				e.printStackTrace();
+				correct=false;
+			}
+			return correct;
+		}
+
+		private  boolean validateAddress(InternetAddress from,
+				InternetAddress[] to, InternetAddress[] cc,
+				InternetAddress[] bcc) {
+			boolean correct = true;
+			try{
+				correct = from!=null && !from.getAddress().equals("") && to!=null && to.length>=1;
+				String regEx="[^\\s]+@[^\\s]+.[^\\s]+";
+				Pattern pc = Pattern.compile(regEx);
+				Matcher m = null ;
+
+				if(correct){
+					m = pc.matcher(from.getAddress());
+					correct = m.matches();
+				}
+				
+				if(correct){
+					int vault = to.length;
+					while(correct && vault<to.length){
+						correct = !to[vault].getAddress().equals("");
+						if(correct){
+					    	m = pc.matcher(to[vault].getAddress());
+					    	correct = m.matches();
+						}
+						vault++;
+					}
+				}
+				
+				if(correct && cc!=null){
+					int vault = cc.length;
+					while(correct && vault<cc.length){
+						correct = !cc[vault].getAddress().equals("");
+						if(correct){
+					    	m = pc.matcher(cc[vault].getAddress());
+					    	correct = m.matches();
+						}
+						vault++;
+					}
+				}
+				
+				if(correct && bcc!=null){
+					int vault = bcc.length;
+					while(correct && vault<bcc.length){
+						correct = !bcc[vault].getAddress().equals("");
+						if(correct){
+					    	m = pc.matcher(bcc[vault].getAddress());
+					    	correct = m.matches();
+						}
+						vault++;
+					}
+				}
+				
+			}catch(Exception e){
+				e.printStackTrace();
+				correct=false;
+			}
+			return correct;
+		}
+
+		private  InternetAddress[] eliminateSpaces(InternetAddress[] address) {
+			if(address!=null){
+				for(int i=0;i<address.length;i++){
+					address[i].setAddress(address[i].getAddress().replace(" ","").trim());
+				}
+			}
+			return address;
+		}		
+
+		
+		public static void main(String[] args){
+			EmailSender s = new EmailSender();
+			try {
+				s.sendMail("smtp.live.com", "bgalitsky@hotmail.com", "******", new InternetAddress("bgalitsky@hotmail.com"), new InternetAddress[]{new InternetAddress("bgalitsky@hotmail.com")}, new InternetAddress[]{}, new InternetAddress[]{}, 
+						"Generated content for you", "body", null);
+			} catch (AddressException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			} catch (Exception e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			}
+		}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,24 @@
+package opennlp.tools.apps.utils.email;
+import javax.mail.*;
+
+
+/**
+ * This contains the required informations for the smtp authorization!
+ *
+ */
+
+public class SMTP_Authenticator extends javax.mail.Authenticator {
+	
+	private String username="bg7550@gmail.com";
+	private String password="pill0693";	
+	
+	public SMTP_Authenticator(String user, String pwd) {
+		username=user;
+		password=pwd;
+	}
+
+		
+	public PasswordAuthentication getPasswordAuthentication() {
+		return new PasswordAuthentication(username, password);
+		}
+}