You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by bg...@apache.org on 2016/11/22 13:05:20 UTC
[06/11] opennlp-sandbox git commit: removed stanford nlp refs
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java
deleted file mode 100644
index 6b72e47..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.parse_thicket2graph;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-
-import opennlp.tools.parse_thicket.ParseCorefsBuilder;
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.ParseTreeNode;
-import opennlp.tools.parse_thicket.matching.Matcher;
-import opennlp.tools.textsimilarity.ParseTreeChunk;
-
-import org.jgrapht.Graph;
-import org.jgrapht.alg.BronKerboschCliqueFinder;
-import org.jgrapht.graph.DefaultEdge;
-import org.jgrapht.graph.SimpleGraph;
-
-
-public class EdgeProductBuilder {
- private Matcher matcher = new Matcher();
- private ParseCorefsBuilder ptBuilder = ParseCorefsBuilder.getInstance();
- private GraphFromPTreeBuilder graphBuilder = new GraphFromPTreeBuilder();
-
-
- public Graph<ParseGraphNode[], DefaultEdge>
- buildEdgeProduct(Graph<ParseGraphNode, DefaultEdge> g1, Graph<ParseGraphNode, DefaultEdge> g2 ){
- Graph<ParseGraphNode[], DefaultEdge> gp =
- new SimpleGraph<ParseGraphNode[], DefaultEdge>(DefaultEdge.class);
-
- Set<DefaultEdge> edges1 = g1.edgeSet();
- Set<DefaultEdge> edges2 = g2.edgeSet();
- // build nodes of product graph
- for(DefaultEdge e1:edges1){
- for(DefaultEdge e2:edges2){
- ParseGraphNode sourceE1s = g1.getEdgeSource(e1), sourceE1t = g1.getEdgeTarget(e1);
- ParseGraphNode sourceE2s = g2.getEdgeSource(e2), sourceE2t = g2.getEdgeTarget(e2);
-
- if (isNotEmpty(matcher.generalize(sourceE1s.getPtNodes(), sourceE2s.getPtNodes())) &&
- isNotEmpty(matcher.generalize(sourceE1t.getPtNodes(), sourceE2t.getPtNodes()))
- )
- gp.addVertex(new ParseGraphNode[] {sourceE1s, sourceE1t, sourceE2s, sourceE2t } );
- }
- }
-
- Set<ParseGraphNode[]> productVerticesSet = gp.vertexSet();
- List<ParseGraphNode[]> productVerticesList = new ArrayList<ParseGraphNode[]>(productVerticesSet);
- for(int i=0; i<productVerticesList.size(); i++){
- for(int j=i+1; j<productVerticesList.size(); j++){
- ParseGraphNode[] prodVertexI = productVerticesList.get(i);
- ParseGraphNode[] prodVertexJ = productVerticesList.get(j);
- if (bothAjacentOrNeitherAdjacent(prodVertexI, prodVertexJ)){
- gp.addEdge(prodVertexI, prodVertexJ);
- }
- }
- }
-
-
- return gp;
-
- }
- /*
- * Finding the maximal clique is the slowest part
- */
-
- public Collection<Set<ParseGraphNode[]>> getMaximalCommonSubgraphs(Graph<ParseGraphNode[], DefaultEdge> g){
- BronKerboschCliqueFinder<ParseGraphNode[], DefaultEdge> finder =
- new BronKerboschCliqueFinder<ParseGraphNode[], DefaultEdge>(g);
-
- Collection<Set<ParseGraphNode[]>> cliques = finder.getBiggestMaximalCliques();
- return cliques;
- }
-
-
- private boolean bothAjacentOrNeitherAdjacent(ParseGraphNode[] prodVertexI,
- ParseGraphNode[] prodVertexJ) {
- List<ParseGraphNode> prodVertexIlist =
- new ArrayList<ParseGraphNode>(Arrays.asList(prodVertexI));
- List<ParseGraphNode> prodVertexJlist =
- new ArrayList<ParseGraphNode>(Arrays.asList(prodVertexJ));
- prodVertexIlist.retainAll(prodVertexJlist);
- return (prodVertexIlist.size()==2 || prodVertexIlist.size()==4);
- }
-
-
- private boolean isNotEmpty(List<List<ParseTreeChunk>> generalize) {
- if (generalize!=null && generalize.get(0)!=null && generalize.get(0).size()>0)
- return true;
- else
- return false;
- }
-
- public Collection<Set<ParseGraphNode[]>> assessRelevanceViaMaximalCommonSubgraphs(String para1, String para2) {
- // first build PTs for each text
- ParseThicket pt1 = ptBuilder.buildParseThicket(para1);
- ParseThicket pt2 = ptBuilder.buildParseThicket(para2);
- // then build phrases and rst arcs
- Graph<ParseGraphNode, DefaultEdge> g1 = graphBuilder.buildGraphFromPT(pt1);
- Graph<ParseGraphNode, DefaultEdge> g2 = graphBuilder.buildGraphFromPT(pt2);
-
- Graph<ParseGraphNode[], DefaultEdge> gp = buildEdgeProduct(g1, g2);
- Collection<Set<ParseGraphNode[]>> col = getMaximalCommonSubgraphs(gp);
- return col;
- }
-
- public static void main(String[] args){
- EdgeProductBuilder b = new EdgeProductBuilder();
- Collection<Set<ParseGraphNode[]>> col = b.assessRelevanceViaMaximalCommonSubgraphs("Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons."+
- "UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " +
- "A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " +
- "Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. "
-
- , "Iran refuses the UN offer to end a conflict over its nuclear weapons."+
- "UN passes a resolution prohibiting Iran from developing its uranium enrichment site. " +
- "A recent UN report presented charts saying Iran was working on nuclear weapons. " +
- "Iran envoy to UN states its nuclear development is for peaceful purpose, and the evidence against its claim is fabricated by the US. ");
- System.out.print(col);
- }
-}
-
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java
deleted file mode 100644
index d19d7db..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.parse_thicket2graph;
-
-import java.io.PrintWriter;
-import java.util.List;
-
-import opennlp.tools.parse_thicket.PTTree;
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.ParseTreeNode;
-import org.jgrapht.Graph;
-import org.jgrapht.graph.DefaultDirectedWeightedGraph;
-import org.jgrapht.graph.DefaultEdge;
-import org.jgrapht.graph.SimpleGraph;
-
-
-import edu.stanford.nlp.trees.LabeledScoredTreeNode;
-import edu.stanford.nlp.trees.Tree;
-
-public class GraphFromPTreeBuilder {
-
-
- public Graph<ParseGraphNode, DefaultEdge> buildGraphFromPT(ParseThicket pt){
- PrintWriter out = new PrintWriter(System.out);
-
-
- List<Tree> ts = pt.getSentences();
- ts.get(0).pennPrint(out);
- Graph<ParseGraphNode, DefaultEdge> gfragment = buildGGraphFromTree(ts.get(0));
-
- //ParseTreeVisualizer applet = new ParseTreeVisualizer();
- //applet.showGraph(gfragment);
-
- return gfragment;
-
- }
-
-
- private Graph<ParseGraphNode, DefaultEdge> buildGGraphFromTree(Tree tree) {
- Graph<ParseGraphNode, DefaultEdge> g =
- new SimpleGraph<ParseGraphNode, DefaultEdge>(DefaultEdge.class);
- ParseGraphNode root = new ParseGraphNode(tree,"S 0");
- g.addVertex(root);
- navigate(tree, g, 0, root);
-
- return g;
- }
-
-
-
- private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge> g, int l, ParseGraphNode currParent) {
- //String currParent = tree.label().value()+" $"+Integer.toString(l);
- //g.addVertex(currParent);
- if (tree.getChildrenAsList().size()==1)
- navigate(tree.getChildrenAsList().get(0), g, l+1, currParent);
- else
- if (tree.getChildrenAsList().size()==0)
- return;
-
- for(Tree child: tree.getChildrenAsList()){
- String currChild = null;
- ParseGraphNode currChildNode = null;
- try {
- if (child.isLeaf())
- continue;
- if (child.label().value().startsWith("S"))
- navigate(child.getChildrenAsList().get(0), g, l+1, currParent);
-
- if (!child.isPhrasal() || child.isPreTerminal())
- currChild = child.toString()+" #"+Integer.toString(l);
- else
- currChild = child.label().value()+" #"+Integer.toString(l);
- currChildNode = new ParseGraphNode(child, currChild);
- g.addVertex(currChildNode);
- g.addEdge(currParent, currChildNode);
- } catch (Exception e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- navigate(child, g, l+1, currChildNode);
- }
- }
-
-
- /*
- private static void navigateChildren(PTTree[] trChildren, int indent, boolean parentLabelNull, boolean onlyLabelValue, List<LabeledScoredTreeNode> phrases) {
- boolean firstSibling = true;
- boolean leftSibIsPreTerm = true; // counts as true at beginning
- for (PTTree currentTree : trChildren) {
- currentTree.navigate(indent, parentLabelNull, firstSibling, leftSibIsPreTerm, false, onlyLabelValue, phrases);
- leftSibIsPreTerm = currentTree.isPreTerminal();
- // CC is a special case for English, but leave it in so we can exactly match PTB3 tree formatting
- if (currentTree.value() != null && currentTree.value().startsWith("CC")) {
- leftSibIsPreTerm = false;
- }
- firstSibling = false;
- }
- }
-
-
- private void navigate(int indent, boolean parentLabelNull, boolean firstSibling, boolean leftSiblingPreTerminal, boolean topLevel, boolean onlyLabelValue, List<LabeledScoredTreeNode> phrases) {
- // the condition for staying on the same line in Penn Treebank
- boolean suppressIndent = (parentLabelNull || (firstSibling && isPreTerminal()) || (leftSiblingPreTerminal && isPreTerminal() && (label() == null || !label().value().startsWith("CC"))));
- if (suppressIndent) {
- //pw.print(" ");
- // pw.flush();
- } else {
- if (!topLevel) {
- //pw.println();
- }
- for (int i = 0; i < indent; i++) {
- //pw.print(" ");
- // pw.flush();
- }
- }
- if (isLeaf() || isPreTerminal()) {
- String terminalString = toStringBuilder(new StringBuilder(), onlyLabelValue).toString();
- //pw.print(terminalString);
- //pw.flush();
- return;
- }
- //pw.print("(");
- String nodeString = onlyLabelValue ? value() : nodeString();
- //pw.print(nodeString);
- // pw.flush();
- boolean parentIsNull = label() == null || label().value() == null;
- navigateChildren(children(), indent + 1, parentIsNull, true, phrases);
- //pw.print(")");
-
- }
- */
-
-}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java
deleted file mode 100644
index 6f9c3ea..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.parse_thicket2graph;
-
-import java.util.List;
-
-import opennlp.tools.parse_thicket.ParseTreeNode;
-import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder;
-
-
-import edu.stanford.nlp.trees.Tree;
-
-public class ParseGraphNode {
- PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder();
-
- private Tree tree;
- private String label;
- private List<List<ParseTreeNode>> ptNodes;
-
-
-
- public List<List<ParseTreeNode>> getPtNodes() {
- return ptNodes;
- }
-
- public ParseGraphNode(Tree tree, String label) {
- super();
- this.tree = tree;
- this.label = label;
- ptNodes = phraseBuilder.buildPT2ptPhrasesForASentence(tree, null);
- }
-
- public Tree getTree() {
- return tree;
- }
-
- public void setTree(Tree tree) {
- this.tree = tree;
- }
-
- public String getLabel() {
- return label;
- }
-
- public void setLabel(String label) {
- this.label = label;
- }
-
- public String toString(){
- return label;
- }
-}
-
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java
deleted file mode 100644
index 71c1fa3..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* ----------------------
- * JGraphAdapterDemo.java
- * ----------------------
- * (C) Copyright 2003-2008, by Barak Naveh and Contributors.
- *
- * Original Author: Barak Naveh
- * Contributor(s): -
- *
- * $Id: JGraphAdapterDemo.java 725 2010-11-26 01:24:28Z perfecthash $
- *
- * Changes
- * -------
- * 03-Aug-2003 : Initial revision (BN);
- * 07-Nov-2003 : Adaptation to JGraph 3.0 (BN);
- *
- */
-package opennlp.tools.parse_thicket.parse_thicket2graph;
-
-import java.awt.*;
-import java.awt.geom.*;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-import javax.swing.*;
-
-
-import org.jgraph.*;
-import org.jgraph.graph.*;
-
-import org.jgrapht.*;
-import org.jgrapht.ext.*;
-import org.jgrapht.graph.*;
-
-
-import org.jgrapht.graph.DefaultEdge;
-
-public class ParseTreeVisualizer
-extends JApplet
-{
- //~ Static fields/initializers ---------------------------------------------
-
- private static final long serialVersionUID = 3256346823498765434L;
- private static final Color DEFAULT_BG_COLOR = Color.decode("#FAFBFF");
- private static final Dimension DEFAULT_SIZE = new Dimension(1200, 800);
-
- //~ Instance fields --------------------------------------------------------
-
- //
- private JGraphModelAdapter<String, DefaultEdge> jgAdapter;
-
- public void showGraph(Graph g){
- ParseTreeVisualizer applet = new ParseTreeVisualizer();
- applet.importGraph(g);
-
- JFrame frame = new JFrame();
- frame.getContentPane().add(applet);
- frame.setTitle("Showing parse thicket");
- frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
- frame.pack();
- frame.setVisible(true);
- }
-
- // TODO cast to ParseGraphNode
- private void importGraph(Graph g) {
- // create a visualization using JGraph, via an adapter
- jgAdapter = new JGraphModelAdapter<String, DefaultEdge>(g);
-
- JGraph jgraph = new JGraph(jgAdapter);
-
- adjustDisplaySettings(jgraph);
- getContentPane().add(jgraph);
- resize(DEFAULT_SIZE);
-
- Set<String> vertexSet = ( Set<String>)g.vertexSet();
- int count=0;
- Map<Integer, Integer> level_count = new HashMap<Integer, Integer> ();
-
- for(String vertexStr: vertexSet){
- Integer key = 0;
- try {
- if (vertexStr.indexOf('#')>-1)
- key = Integer.parseInt(vertexStr.split("#")[1]);
- } catch (Exception e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- Integer howManyAlready = 0;
-
- if (key>0){
- howManyAlready = level_count.get(key);
- if (howManyAlready==null){
- howManyAlready=0;
- level_count.put(key, 1);
- } else {
- level_count.put(key, howManyAlready+1);
- }
- }
- positionVertexAt(vertexStr, count+howManyAlready*50, count);
- count+=20;
- }
-
-
- }
-
- /**
- * An alternative starting point for this demo, to also allow running this
- * applet as an application.
- *
- * @param args ignored.
- */
- public static void main(String [] args)
- {
- ParseTreeVisualizer applet = new ParseTreeVisualizer();
- applet.init();
-
- JFrame frame = new JFrame();
- frame.getContentPane().add(applet);
- frame.setTitle("JGraphT Adapter to JGraph Demo");
- frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
- frame.pack();
- frame.setVisible(true);
- }
-
-
-
- private void adjustDisplaySettings(JGraph jg)
- {
- jg.setPreferredSize(DEFAULT_SIZE);
-
- Color c = DEFAULT_BG_COLOR;
- String colorStr = null;
-
- try {
- colorStr = getParameter("bgcolor");
- } catch (Exception e) {
- }
-
- if (colorStr != null) {
- c = Color.decode(colorStr);
- }
-
- jg.setBackground(c);
- }
-
- @SuppressWarnings("unchecked") // FIXME hb 28-nov-05: See FIXME below
- private void positionVertexAt(Object vertex, int x, int y)
- {
- DefaultGraphCell cell = jgAdapter.getVertexCell(vertex);
- AttributeMap attr = cell.getAttributes();
- Rectangle2D bounds = GraphConstants.getBounds(attr);
-
- Rectangle2D newBounds =
- new Rectangle2D.Double(
- x,
- y,
- bounds.getWidth(),
- bounds.getHeight());
-
- GraphConstants.setBounds(attr, newBounds);
-
- // TODO: Clean up generics once JGraph goes generic
- AttributeMap cellAttr = new AttributeMap();
- cellAttr.put(cell, attr);
- jgAdapter.edit(cellAttr, null, null, null);
- }
-
-}
-
-// End JGraphAdapterDemo.java
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/TreeKernelBasedRecognizerOfRequest_Response.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/TreeKernelBasedRecognizerOfRequest_Response.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/TreeKernelBasedRecognizerOfRequest_Response.java
deleted file mode 100644
index e33e089..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/TreeKernelBasedRecognizerOfRequest_Response.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.request_response_recognizer;
-
-
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.VerbNetProcessor;
-import opennlp.tools.parse_thicket.external_rst.MatcherExternalRST;
-import opennlp.tools.parse_thicket.external_rst.ParseThicketWithDiscourseTree;
-import opennlp.tools.parse_thicket.kernel_interface.TreeKernelBasedClassifierMultiplePara;
-
-/*
- * This class performs TK learning based on parse thicket which includes RST relations only
- * based on Surdeanu at al RST parser. It does sentence parsing and NLP pipeline of
- * Surdeanu's wrapper of Stanford NLP
- */
-public class TreeKernelBasedRecognizerOfRequest_Response extends TreeKernelBasedClassifierMultiplePara{
-
- private MatcherExternalRST matcherRST = new MatcherExternalRST();
-
- protected List<String> formTreeKernelStructuresMultiplePara(List<String> texts, String flag) {
- //TODO
- this.setShortRun();
- List<String> extendedTreesDumpTotal = new ArrayList<String>();
- try {
-
- for(String text: texts){
- // get the parses from original documents, and form the training dataset
- try {
- System.out.print("About to build pt with external rst from "+text + "\n...");
- ParseThicket pt = matcherRST.buildParseThicketFromTextWithRST(text);
- if (pt == null)
- continue;
- System.out.print("About to build extended forest with external rst...");
- List<String> extendedTreesDump = // use direct option (true
- buildReptresentationForDiscourseTreeAndExtensions((ParseThicketWithDiscourseTree)pt, true);
- for(String line: extendedTreesDump)
- extendedTreesDumpTotal.add(flag + " |BT| "+line + " |ET| ");
- System.out.println("DONE");
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- return extendedTreesDumpTotal;
- }
-
- private List<String> buildReptresentationForDiscourseTreeAndExtensions(ParseThicketWithDiscourseTree pt, boolean bDirectDT){
- List<String> extendedTreesDump = new ArrayList<String>();
- if (!bDirectDT)
- // option 1: use RST relation for extended trees
- extendedTreesDump = treeExtender.buildForestForRSTArcs(pt);
- else {
- // option 2: use DT directly
- extendedTreesDump.add(pt.getDtDump());
- extendedTreesDump.add(pt.getDtDumpWithPOS());
- extendedTreesDump.add(pt.getDtDumpWithEmbeddedTrees());
- extendedTreesDump.add(pt.getDtDumpWithVerbNet());
- }
- return extendedTreesDump;
- }
-
- public static void main(String[] args){
- VerbNetProcessor p = VerbNetProcessor.
- getInstance("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources");
-
- TreeKernelBasedRecognizerOfRequest_Response proc = new TreeKernelBasedRecognizerOfRequest_Response();
- proc.setKernelPath("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/tree_kernel/");
- proc.trainClassifier(
- YahooAnswersTrainingSetCreator.origFilesDir,
- YahooAnswersTrainingSetCreator.origFilesDir.replace("/text", "/neg_text")
- );
- }
-
-}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/YahooAnswersTrainingSetCreator.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/YahooAnswersTrainingSetCreator.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/YahooAnswersTrainingSetCreator.java
deleted file mode 100644
index c060c95..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/request_response_recognizer/YahooAnswersTrainingSetCreator.java
+++ /dev/null
@@ -1,118 +0,0 @@
-package opennlp.tools.parse_thicket.request_response_recognizer;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.similarity.apps.BingQueryRunner;
-
-import org.apache.commons.io.FileUtils;
-
-public class YahooAnswersTrainingSetCreator {
- protected List<File> queuePos = new ArrayList<File>(), queueNeg = new ArrayList<File>();
- public static String origFilesDir = "/Users/bgalitsky/Downloads/NewCategoryIdentification/text";
- //private BingQueryRunner searcher = new BingQueryRunner();
- protected void addFilesPos(File file) {
-
- if (!file.exists()) {
- System.out.println(file + " does not exist.");
- }
- if (file.isDirectory()) {
- for (File f : file.listFiles()) {
- addFilesPos(f);
- System.out.println(f.getName());
- }
- } else {
- queuePos.add(file);
- }
- }
-
- protected void addFilesNeg(File file) {
-
- if (!file.exists()) {
- System.out.println(file + " does not exist.");
- }
- if (file.isDirectory()) {
- for (File f : file.listFiles()) {
- addFilesNeg(f);
- System.out.println(f.getName());
- }
- } else {
- queueNeg.add(file);
- }
- }
-
- public void formNegTrainingSet(String posPath , String negPath){
- if (!new File(negPath).exists())
- new File(negPath).mkdir();
-
- addFilesPos(new File(posPath));
- for(int i=0; i< queuePos.size()-1; i+=2){ //take two files at a time
- File f1 = queuePos.get(i), f2 = queuePos.get(i+1);
- String content1 = null, content2 = null;
- try {
- content1 = FileUtils.readFileToString(f1);
- content2 = FileUtils.readFileToString(f2);
- } catch (IOException e) {
- e.printStackTrace();
- }
- String[] portions1 = content1.split("\n\n");
- String[] portions2 = content2.split("\n\n");
-
- portions1 = splitIntoRR(portions1, content1);
- portions2 = splitIntoRR(portions2, content2);
- if (portions1==null || portions2==null)
- continue;
- // do cross-breeding
- try {
- FileUtils.writeStringToFile(new File(negPath+"/" + f1.getName()+".txt"),
- portions1[0] + "\n\n" + portions2[1] );
- FileUtils.writeStringToFile(new File(negPath+"/" + f2.getName()+".txt"),
- portions2[0] + "\n\n" + portions1[1] );
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
-
- }
- private String[] splitIntoRR(String[] portions, String content) {
- if (portions.length<2 ){
- portions = content.replace("?","#_#").split("#_#");
- }
- if (portions.length<2 ){
- portions = content.split("\n");
- }
- if (portions.length<2)
- return null;
- if (portions.length>2){
- String q= "", a = "";
- boolean bQ = true;
- for(int p=0; p<portions.length; p++){
- if ( bQ )
- q+=portions[p]+" \n";
- else
- a +=portions[p]+" \n";
-
- if (portions[p].endsWith("?")){
- bQ=false;
- }
-
- }
- if (!bQ) {
- portions = new String[2];
- portions[0] = q;
- portions[1] = a;
- } else
- return null;
- }
-
- return portions;
- }
-
- public static void main(String[] args){
- String dir = YahooAnswersTrainingSetCreator.origFilesDir;
- new YahooAnswersTrainingSetCreator().formNegTrainingSet(dir, dir.replace("/text", "/neg_text"));
- }
-}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java
deleted file mode 100644
index 96bec44..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.rhetoric_structure;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import opennlp.tools.parse_thicket.ArcType;
-import opennlp.tools.parse_thicket.Pair;
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.ParseTreeNode;
-import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;
-
-import org.jgrapht.Graph;
-import org.jgrapht.graph.DefaultEdge;
-import org.jgrapht.graph.SimpleGraph;
-
-
-import edu.stanford.nlp.trees.Tree;
-
-public class RhetoricStructureArcsBuilder {
- private RhetoricStructureMarker markerBuilderForSentence = new RhetoricStructureMarker();
-
- private Map<Integer, List<Pair<String, Integer[]>>> buildMarkers(ParseThicket pt){
-
- Map<Integer, List<Pair<String, Integer[]>>> sentNumMarkers = new
- HashMap<Integer, List<Pair<String, Integer[]>>>();
- int count = 0;
- for( List<ParseTreeNode> sent: pt.getNodesThicket()){
- List<Pair<String, Integer[]>> markersForSentence = markerBuilderForSentence.
- extractRSTrelationInSentenceGetBoundarySpan(sent);
- sentNumMarkers.put(count, markersForSentence);
- count++;
- }
- return sentNumMarkers;
- }
-
-
- /*
- * Induced RST algorithm
- *
- * Input: obtained RST markers (numbers of words which
- * splits sentence in potential RST relation arguments) +
- * Current Parse Thicket with arcs for coreferences
- *
- * We search for parts of sentences on the opposite side of RST markers
- *
- * $sentPosFrom$ marker
- * | == == == [ ] == == == |
- * \ \
- * \ \
- * coref RST arc being formed
- * \ \
- * \ \
- * | == == == == == [ ] == == ==|
- *
- * Mark yelled at his dog, but it disobeyed
- * | \
- * coref RST arc for CONTRAST being formed
- * | \
- * He was upset, however he did not show it
- * $sentPosTo$
- */
- public List<WordWordInterSentenceRelationArc> buildRSTArcsFromMarkersAndCorefs(
- List<WordWordInterSentenceRelationArc> arcs,
- Map<Integer, List<List<ParseTreeNode>>> sentNumPhrasesMap,
- ParseThicket pt ) {
- List<WordWordInterSentenceRelationArc> arcsRST = new ArrayList<WordWordInterSentenceRelationArc>();
-
- Map<Integer, List<Pair<String, Integer[]>>> rstMarkersMap = buildMarkers(pt);
-
- for(int nSentFrom=0; nSentFrom<pt.getSentences().size(); nSentFrom++){
- for(int nSentTo=nSentFrom+1; nSentTo<pt.getSentences().size(); nSentTo++){
- // for given arc, find phrases connected by this arc and add to the list of phrases
-
- List<List<ParseTreeNode>> phrasesFrom = sentNumPhrasesMap.get(nSentFrom);
- List<List<ParseTreeNode>> phrasesTo = sentNumPhrasesMap.get(nSentTo);
- List<Pair<String, Integer[]>> markersFrom = rstMarkersMap.get(nSentFrom);
- List<Pair<String, Integer[]>> markersTo = rstMarkersMap.get(nSentTo);
- for(WordWordInterSentenceRelationArc arc: arcs){
- // arc should be coref and link these sentences
- if (nSentFrom != arc.getCodeFrom().getFirst() ||
- nSentTo != arc.getCodeTo().getFirst() ||
- !arc.getArcType().getType().startsWith("coref")
- )
- continue;
- int sentPosFrom = arc.getCodeFrom().getSecond();
- int sentPosTo = arc.getCodeTo().getSecond();
- // not more than a single RST link for a pair of sentences
- boolean bFound = false;
- for(List<ParseTreeNode> vpFrom: phrasesFrom){
- if (bFound)
- break;
- for(List<ParseTreeNode> vpTo: phrasesTo){
- for(Pair<String, Integer[]> mFrom: markersFrom){
- for(Pair<String, Integer[]> mTo: markersTo) {
- {
- // the phrases should be on an opposite side of rst marker for a coref link
- if (isSequence( new Integer[] { sentPosFrom, vpFrom.get(0).getId(), mFrom.getSecond()[0]}) &&
- isSequence( new Integer[] { sentPosTo, vpTo.get(0).getId(), mTo.getSecond()[0]}) ){
- ArcType arcType = new ArcType("rst", mFrom.getFirst(), 0, 0);
-
- WordWordInterSentenceRelationArc arcRST =
- new WordWordInterSentenceRelationArc(new Pair<Integer, Integer>(nSentFrom, mFrom.getSecond()[1]),
- new Pair<Integer, Integer>(nSentTo, mTo.getSecond()[1]), "", "", arcType);
- arcsRST.add(arcRST);
- bFound = true;
- break;
- }
- }
- }
- }
- }
- }
- }
- }
- }
-
- return arcs;
- }
-
-// check if the word positions occur in sentence in the order Integer[]
-// TODO make more sensitive algo
- private static boolean isSequence(Integer[] integers) {
- //TODO better construction of array
- if (integers==null || integers.length<3)
- return false;
- try {
- for(Integer i: integers)
- if (i==0)
- return false;
- } catch (Exception e) {
- return false;
- }
-
- Boolean bWrongOrder = false;
- for(int i=1; i< integers.length; i++){
- if (integers[i-1]>integers[i]){
- bWrongOrder = true;
- break;
- }
- }
-
- Boolean bWrongInverseOrder = false;
- for(int i=1; i< integers.length; i++){
- if (integers[i-1]<integers[i]){
- bWrongInverseOrder = true;
- break;
- }
- }
-
- return !(bWrongOrder && bWrongInverseOrder);
- }
-
-
-
- public static void main(String[] args){
-
-
- }
-}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java
deleted file mode 100644
index 3b1c576..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.rhetoric_structure;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import opennlp.tools.parse_thicket.IGeneralizer;
-import opennlp.tools.parse_thicket.Pair;
-import opennlp.tools.parse_thicket.ParseTreeNode;
-
-
-public class RhetoricStructureMarker implements IGeneralizer<Integer[]> {
- //private static String rstRelations[] = {"antithesis", "concession", "contrast", "elaboration"};
- List<Pair<String, ParseTreeNode[]>> rstMarkers = new ArrayList<Pair<String, ParseTreeNode[]>>();
-
- public RhetoricStructureMarker(){
-
- rstMarkers.add(new Pair<String, ParseTreeNode[]>("contrast", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("than",",") }));
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "antithesis", new ParseTreeNode[]{new ParseTreeNode("although",","), new ParseTreeNode("*","*") }));
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("however","*") }));
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("however","*"), new ParseTreeNode(",",","),
- new ParseTreeNode("*","prp"), }));
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "elaboration", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("*","NN") }));
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "elaboration", new ParseTreeNode[]{new ParseTreeNode("as","*"), new ParseTreeNode("a","*") }));
-
- rstMarkers.add(new Pair<String, ParseTreeNode[]>("explanation", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("because",",") }));
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "example", new ParseTreeNode[]{new ParseTreeNode("for","IN"), new ParseTreeNode("example","NN") }));
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("ye","*") }));
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("yet","*"), new ParseTreeNode(",",","),
- new ParseTreeNode("*","prp"), }));
-
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("yet","*"), new ParseTreeNode("i","*"),
- }));
-
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "explanation", new ParseTreeNode[]{new ParseTreeNode(",",","), new ParseTreeNode("where","*") }));
- //as long as
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "temp_sequence", new ParseTreeNode[]{/*new ParseTreeNode("as","*"),*/ new ParseTreeNode("*","RB"),
- new ParseTreeNode("as","IN"),}));
- rstMarkers.add(new Pair<String, ParseTreeNode[]>( "temp_sequence", new ParseTreeNode[]{/*new ParseTreeNode("as","*"),*/ new ParseTreeNode("*","VB*"),
- new ParseTreeNode("until","IN"),}));
-
- }
-
- /* For a sentence, we obtain a list of markers with the CA word and position in the sentence
- * Output span is an integer array with start/end occurrence of an RST marker in a sentence
- * */
- public List<Pair<String, Integer[]>> extractRSTrelationInSentenceGetBoundarySpan(List<ParseTreeNode> sentence){
- List<Pair<String, Integer[]>> results = new ArrayList<Pair<String, Integer[]>> ();
-
- for(Pair<String, ParseTreeNode[]> template: rstMarkers){
- List<Integer[]> spanList = generalize(sentence,template.getSecond() );
- if (!spanList.isEmpty())
- results.add(new Pair<String, Integer[]>(template.getFirst(), spanList.get(0)));
- }
- return results;
- }
-
- /* Rule application in the form of generalization
- * Generalizing a sentence with a rule (a template), we obtain the occurrence of rhetoric marker
- *
- * o1 - sentence
- * o2 - rule/template, specifying lemmas and/or POS, including punctuation
- * @see opennlp.tools.parse_thicket.IGeneralizer#generalize(java.lang.Object, java.lang.Object)
- * returns the span Integer[]
- */
- @Override
- public List<Integer[]> generalize(Object o1, Object o2) {
- List<Integer[]> result = new ArrayList<Integer[]>();
-
- List<ParseTreeNode> sentence = (List<ParseTreeNode> )o1;
- ParseTreeNode[] template = (ParseTreeNode[]) o2;
-
- boolean bBeingMatched = false;
- for(int wordIndexInSentence=0; wordIndexInSentence<sentence.size(); wordIndexInSentence++){
- ParseTreeNode word = sentence.get(wordIndexInSentence);
- int wordIndexInSentenceEnd = wordIndexInSentence; //init iterators for internal loop
- int templateIterator=0;
- while (wordIndexInSentenceEnd<sentence.size() && templateIterator< template.length){
- ParseTreeNode tword = template[templateIterator];
- ParseTreeNode currWord=sentence.get(wordIndexInSentenceEnd);
- List<ParseTreeNode> gRes = tword.generalize(tword, currWord);
- if (gRes.isEmpty()|| gRes.get(0)==null || ( gRes.get(0).getWord().equals("*")
- && gRes.get(0).getPos().equals("*") )){
- bBeingMatched = false;
- break;
- } else {
- bBeingMatched = true;
- }
- wordIndexInSentenceEnd++;
- templateIterator++;
- }
- // template iteration is done
- // the only condition for successful match is IF we are at the end of template
- if (templateIterator == template.length){
- result.add(new Integer[]{wordIndexInSentence, wordIndexInSentenceEnd-1});
- return result;
- }
-
- // no match for current sentence word: proceed to the next
- }
- return result;
- }
-
- public String markerToString(List<Pair<String, Integer[]>> res){
- StringBuffer buf = new StringBuffer();
- buf.append("[");
- for(Pair<String, Integer[]> marker: res){
- buf.append(marker.getFirst()+":");
- for(int a: marker.getSecond()){
- buf.append(a+" ");
- }
- buf.append (" | ");
- }
- buf.append("]");
- return buf.toString();
- }
-
- public static void main(String[] args){
- ParseTreeNode[] sent =
- new ParseTreeNode[]{new ParseTreeNode("he","prn"), new ParseTreeNode("was","vbz"), new ParseTreeNode("more","jj"),
- new ParseTreeNode(",",","), new ParseTreeNode("than",","), new ParseTreeNode("little","jj"), new ParseTreeNode("boy","nn"),
- new ParseTreeNode(",",","), new ParseTreeNode("however","*"), new ParseTreeNode(",",","),
- new ParseTreeNode("he","prp"), new ParseTreeNode("was","vbz"), new ParseTreeNode("adult","jj")
- };
-
- List<Pair<String, Integer[]>> res = new RhetoricStructureMarker().extractRSTrelationInSentenceGetBoundarySpan(Arrays.asList(sent));
- System.out.println( new RhetoricStructureMarker().markerToString(res));
- }
-}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerStanfRequestHandler.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerStanfRequestHandler.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerStanfRequestHandler.java
deleted file mode 100644
index 477f022..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerStanfRequestHandler.java
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.similarity.apps.solr;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.logging.Logger;
-
-import opennlp.tools.parse_thicket.apps.SnippetToParagraph;
-import opennlp.tools.parse_thicket.matching.Matcher;
-import opennlp.tools.similarity.apps.BingQueryRunner;
-import opennlp.tools.similarity.apps.HitBase;
-import opennlp.tools.similarity.apps.HitBaseComparable;
-import opennlp.tools.similarity.apps.utils.Pair;
-import opennlp.tools.textsimilarity.ParseTreeChunk;
-import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
-import opennlp.tools.textsimilarity.SentencePairMatchResult;
-import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.CachingWrapperFilter;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.QueryWrapperFilter;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.solr.common.SolrDocument;
-import org.apache.solr.common.SolrDocumentList;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.params.ShardParams;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.handler.component.SearchHandler;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-
-
-
-public class SearchResultsReRankerStanfRequestHandler extends SearchHandler {
- private static Logger LOG = Logger
- .getLogger("com.become.search.requestHandlers.SearchResultsReRankerRequestHandler");
- private final static int MAX_SEARCH_RESULTS = 100;
- private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
- private int MAX_QUERY_LENGTH_NOT_TO_RERANK=3;
- private Matcher matcher = new Matcher();
- private BingQueryRunner bingSearcher = new BingQueryRunner();
- private SnippetToParagraph snp = new SnippetToParagraph();
-
-
- public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp){
- // get query string
- String requestExpression = req.getParamString();
- String[] exprParts = requestExpression.split("&");
- for(String part: exprParts){
- if (part.startsWith("q="))
- requestExpression = part;
- }
- String query = StringUtils.substringAfter(requestExpression, ":");
- LOG.info(requestExpression);
-
-
- SolrParams ps = req.getOriginalParams();
- Iterator<String> iter = ps.getParameterNamesIterator();
- List<String> keys = new ArrayList<String>();
- while(iter.hasNext()){
- keys.add(iter.next());
- }
-
- List<HitBase> searchResults = new ArrayList<HitBase>();
-
-
-
-
-
- for ( Integer i=0; i< MAX_SEARCH_RESULTS; i++){
- String title = req.getParams().get("t"+i.toString());
- String descr = req.getParams().get("d"+i.toString());
-
- if(title==null || descr==null)
- continue;
-
- HitBase hit = new HitBase();
- hit.setTitle(title);
- hit.setAbstractText(descr);
- hit.setSource(i.toString());
- searchResults.add(hit);
- }
-
- /*
- * http://173.255.254.250:8983/solr/collection1/reranker/?
- * q=search_keywords:design+iphone+cases&fields=spend+a+day+with+a+custom+iPhone+case&fields=Add+style+to+your+every+day+fresh+design+with+a+custom+iPhone+case&fields=Add+style+to+your+every+day+with+mobile+case+for+your+family&fields=Add+style+to+your+iPhone+and+iPad&fields=Add+Apple+fashion+to+your+iPhone+and+iPad
- *
- */
-
- if (searchResults.size()<1) {
- int count=0;
- for(String val : exprParts){
- if (val.startsWith("fields=")){
- val = StringUtils.mid(val, 7, val.length());
- HitBase hit = new HitBase();
- hit.setTitle("");
- hit.setAbstractText(val);
- hit.setSource(new Integer(count).toString());
- searchResults.add(hit);
- count++;
- }
-
- }
- }
-
-
- List<HitBase> reRankedResults = null;
- query = query.replace('+', ' ');
- if (tooFewKeywords(query)|| orQuery(query)){
- reRankedResults = searchResults;
- LOG.info("No re-ranking for "+query);
- }
- else
- reRankedResults = calculateMatchScoreResortHits(searchResults, query);
- /*
- * <scores>
-<score index="2">3.0005</score>
-<score index="1">2.101</score>
-<score index="3">2.1003333333333334</score>
-<score index="4">2.00025</score>
-<score index="5">1.1002</score>
-</scores>
- *
- *
- */
- StringBuffer buf = new StringBuffer();
- buf.append("<scores>");
- for(HitBase hit: reRankedResults){
- buf.append("<score index=\""+hit.getSource()+"\">"+hit.getGenerWithQueryScore()+"</score>");
- }
- buf.append("</scores>");
-
- NamedList<Object> scoreNum = new NamedList<Object>();
- for(HitBase hit: reRankedResults){
- scoreNum.add(hit.getSource(), hit.getGenerWithQueryScore());
- }
-
- StringBuffer bufNums = new StringBuffer();
- bufNums.append("order>");
- for(HitBase hit: reRankedResults){
- bufNums.append(hit.getSource()+"_");
- }
- bufNums.append("/order>");
-
- LOG.info("re-ranking results: "+buf.toString());
- NamedList<Object> values = rsp.getValues();
- values.remove("response");
- values.add("response", scoreNum);
- values.add("new_order", bufNums.toString().trim());
- rsp.setAllValues(values);
-
- }
-
- private boolean orQuery(String query) {
- if (query.indexOf('|')>-1)
- return true;
-
- return false;
- }
-
- private boolean tooFewKeywords(String query) {
- String[] parts = query.split(" ");
- if (parts!=null && parts.length< MAX_QUERY_LENGTH_NOT_TO_RERANK)
- return true;
-
- return false;
- }
-
- protected List<HitBase> calculateMatchScoreResortHits(List<HitBase> hits,
- String searchQuery) {
-
- List<HitBase> newHitList = new ArrayList<HitBase>();
- int count = 0;
- for (HitBase hit : hits) {
- if (count>10)
- break;
- count++;
- String[] pageSentsAndSnippet = formTextForReRankingFromHit(hit);
-
- Double score = 0.0;
- try {
- List<List<ParseTreeChunk>> match = null;
- if (pageSentsAndSnippet!=null && pageSentsAndSnippet[0].length()>50){
- match = matcher.assessRelevanceCache(pageSentsAndSnippet[0] ,
- searchQuery);
- score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);
- hit.setSource(match.toString());
- }
- if (score < 2){ // attempt to match with snippet, if not much luck with original text
- match = matcher.assessRelevanceCache(pageSentsAndSnippet[0] ,
- searchQuery);
- score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);
- }
- LOG.info(score + " | " +pageSentsAndSnippet[1]);
- } catch (Exception e) {
- LOG.severe("Problem processing snapshot " + pageSentsAndSnippet[1]);
- e.printStackTrace();
- }
- hit.setGenerWithQueryScore(score);
- newHitList.add(hit);
- }
-
- System.out.println("\n\n ============= old ORDER ================= ");
- for (HitBase hit : newHitList) {
- System.out.println(hit.getOriginalSentences().toString() + " => "+hit.getGenerWithQueryScore());
- System.out.println("match = "+hit.getSource());
- }
- Collections.sort(newHitList, new HitBaseComparable());
-
- System.out.println("\n\n ============= NEW ORDER ================= ");
- for (HitBase hit : newHitList) {
- System.out.println(hit.getOriginalSentences().toString() + " => "+hit.getGenerWithQueryScore());
- System.out.println("match = "+hit.getSource());
- }
-
- return newHitList;
- }
-
- protected String[] formTextForReRankingFromHit(HitBase hit) {
- HitBase hitWithFullSents = snp.formTextFromOriginalPageGivenSnippet(hit);
- String textFromOriginalPage = "";
- try {
- List<String> sents = hitWithFullSents.getOriginalSentences();
- for(String s: sents){
- textFromOriginalPage+=s+" ";
- }
-
- if (textFromOriginalPage.startsWith(".")){
- textFromOriginalPage = textFromOriginalPage.substring(2);
- }
- textFromOriginalPage = textFromOriginalPage.replace(" . .", ". ").replace(". . ", ". ").
- replace("..", ". ").trim();
- } catch (Exception e1) {
- e1.printStackTrace();
- LOG.info("Problem processing snapshot "+hit.getAbstractText());
- }
- hit.setPageContent(textFromOriginalPage);
- String snapshot = hit.getAbstractText().replace("<b>...</b>", ". ").replace("<span class='best-phrase'>", " ").replace("<span>", " ").replace("<span>", " ")
- .replace("<b>", "").replace("</b>", "");
- snapshot = snapshot.replace("</B>", "").replace("<B>", "")
- .replace("<br>", "").replace("</br>", "").replace("...", ". ")
- .replace("|", " ").replace(">", " ").replace(". .", ". ");
- snapshot += " . " + hit.getTitle();
-
- return new String[] { textFromOriginalPage, snapshot };
- }
-
-
- public class HitBaseComparable implements Comparator<HitBase> {
- // @Override
- public int compare(HitBase o1, HitBase o2) {
- return (o1.getGenerWithQueryScore() > o2.getGenerWithQueryScore() ? -1
- : (o1 == o2 ? 0 : 1));
- }
- }
-
-}
-
-/*
-
-http://dev1.exava.us:8086/solr/collection1/reranker/?q=search_keywords:I+want+style+in+my+every+day+fresh+design+iphone+cases
-&t1=Personalized+iPhone+4+Cases&d1=spend+a+day+with+a+custom+iPhone+case
-&t2=iPhone+Cases+to+spend+a+day&d2=Add+style+to+your+every+day+fresh+design+with+a+custom+iPhone+case
-&t3=Plastic+iPhone+Cases&d3=Add+style+to+your+every+day+with+mobile+case+for+your+family
-&t4=Personalized+iPhone+and+iPad+Cases&d4=Add+style+to+your+iPhone+and+iPad
-&t5=iPhone+accessories+from+Apple&d5=Add+Apple+fashion+to+your+iPhone+and+iPad
-
-http://dev1.exava.us:8086/solr/collection1/reranker/?q=search_keywords:I+want+style+in+my+every+day+fresh+design+iphone+cases&t1=Personalized+iPhone+4+Cases&d1=spend+a+day+with+a+custom+iPhone+case&t2=iPhone+Cases+to+spend+a+day&d2=Add+style+to+your+every+day+fresh+design+with+a+custom+iPhone+case&t3=Plastic+iPhone+Cases&d3=Add+style+to+your+every+day+with+mobile+case+for+your+family&t4=Personalized+iPhone+and+iPad+Cases&d4=Add+style+to+your+iPhone+and+iPad&t5=iPhone+accessories+from+Apple&d5=Add+Apple+fashion+to+your+iPhone+and+iPad
- */
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java
index 8538c25..78ce8f5 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java
@@ -25,13 +25,11 @@ import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
-import opennlp.tools.parse_thicket.matching.Matcher;
import opennlp.tools.similarity.apps.BingQueryRunner;
import opennlp.tools.similarity.apps.HitBase;
import opennlp.tools.similarity.apps.utils.StringCleaner;
import opennlp.tools.stemmer.PStemmer;
import opennlp.tools.textsimilarity.ParseTreeChunk;
-import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
import opennlp.tools.textsimilarity.SentencePairMatchResult;
import opennlp.tools.textsimilarity.TextProcessor;
import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
@@ -46,9 +44,10 @@ public class DomainTaxonomyExtender {
.getLogger("opennlp.tools.similarity.apps.taxo_builder.DomainTaxonomyExtender");
private BingQueryRunner brunner = new BingQueryRunner();
+ private ParserChunker2MatcherProcessor matcher = ParserChunker2MatcherProcessor.getInstance();
protected static String BING_KEY = "WFoNMM706MMJ5JYfcHaSEDP+faHj3xAxt28CPljUAHA";
- Matcher matcher = new Matcher();
+
private final static String TAXO_FILENAME = "taxo_data.dat";
@@ -161,8 +160,8 @@ public class DomainTaxonomyExtender {
.getTitle() + " " + h1.getAbstractText());
String snapshot2 = StringCleaner.processSnapshotForMatching(h2
.getTitle() + " " + h2.getAbstractText());
- List<List<ParseTreeChunk>> overlaps =matcher.assessRelevance(snapshot1, snapshot2);
- genResult.addAll(overlaps);
+ SentencePairMatchResult overlaps = matcher.assessRelevance(snapshot1, snapshot2);
+ genResult.addAll(overlaps.matchResult);
}
}
}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
index f151768..27f457c 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
@@ -17,6 +17,7 @@
package opennlp.tools.textsimilarity;
+import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -27,7 +28,7 @@ import org.apache.commons.lang3.StringUtils;
import opennlp.tools.parse_thicket.ParseTreeNode;
-public class ParseTreeChunk {
+public class ParseTreeChunk implements Serializable{
private String mainPOS;
private List<String> lemmas;
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/fca/FCATest.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/test/java/opennlp/tools/fca/FCATest.java b/opennlp-similarity/src/test/java/opennlp/tools/fca/FCATest.java
deleted file mode 100755
index 531e6ec..0000000
--- a/opennlp-similarity/src/test/java/opennlp/tools/fca/FCATest.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.fca;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.LinkedHashSet;
-
-import junit.framework.TestCase;
-
-public class FCATest extends TestCase{
- ConceptLattice cl=null;
-/*
- public void testConceptLattice() {
-
-
- try {
- cl = new ConceptLattice("src/test/resources/fca/sports.cxt",true);
- cl.printLatticeStats();
- cl.printLatticeFull();
- cl.printBinContext();
-
- FcaWriter wt = new FcaWriter();
- wt.WriteStatsToCvs("stats.csv", cl, 0);
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
-
- e.printStackTrace();
- }
-
- FcaConverter converter = new FcaConverter();
- int [][] binCon = converter.latticeToContext(cl);
-
- if (binCon!=null){
- ConceptLattice new_cl = new ConceptLattice(binCon.length, binCon[0].length, binCon, false);
- new_cl.printLatticeStats();
- new_cl.printLatticeFull();
- new_cl.printBinContext();
- FcaWriter wt = new FcaWriter();
- wt.WriteStatsToCvs("stats.txt", cl, 0);
- //wt.WriteAsCxt("cl.cxt", cl);
- wt.WriteAsCxt("cl_new.cxt", new_cl);
- }
- }
-
- public void testRandom(){
- RandomNoiseGenerator rng = new RandomNoiseGenerator();
- try {
- cl = new ConceptLattice("src/test/resources/fca/sports.cxt",true);
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- //int[][] bc = rng.AddObjectsAttributesWithProbability(10, 0.5, cl.binaryContext);
- int[][] bc = rng.AlterCellsWithProbability(0.2, cl.binaryContext);
- ConceptLattice new_cl = new ConceptLattice(bc.length, bc[0].length, bc, false);
- new_cl.printLatticeStats();
- new_cl.printLattice();
- }
-*/
-}
-
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java
index f385a69..f5c6222 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java
@@ -43,7 +43,7 @@ public class RelatedSentenceFinderTest extends TestCase {
//assertTrue(result.getFragments().get(0).getFragment().indexOf("Swiss Patent Office")>-1);
}
-
+ /*
public void testBuildParagraphOfGeneratedTextTest(){
HitBase input = new HitBase();
input.setAbstractText("Albert Einstein was a German-born theoretical physicist who developed the general theory of relativity, one of the two pillars of modern physics (alongside ...");
@@ -57,7 +57,7 @@ public class RelatedSentenceFinderTest extends TestCase {
assertTrue(result.getFragments().size()>0);
assertTrue(result.getFragments().get(0).getFragment().indexOf("Albert Einstein")>-1);
}
-
+*/
public void testBuildParagraphOfGeneratedTextTestYearInTheEnd(){
@@ -74,6 +74,7 @@ public class RelatedSentenceFinderTest extends TestCase {
assertTrue(result.getFragments().get(0).getFragment().indexOf("Albert Einstein")>-1);
}
+ /*
public void testBuildParagraphOfGeneratedTextTestBio1(){
HitBase input = new HitBase();
input.setAbstractText("Today, the practical applications of Einstein\ufffds theories ...");
@@ -87,7 +88,7 @@ public class RelatedSentenceFinderTest extends TestCase {
assertTrue(result.getFragments().size()>0);
assertTrue(result.getFragments().get(0).getFragment().indexOf("Einstein")>-1);
}
-/*
+
public void testBuildParagraphOfGeneratedTextTestBio2(){
HitBase input = new HitBase();
input.setAbstractText("The theory of relativity is a beautiful example of ...");
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java
deleted file mode 100644
index fb6259b..0000000
--- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.apps;
-
-
-import opennlp.tools.similarity.apps.HitBase;
-import junit.framework.TestCase;
-
-
-public class SnippetToParagraphTest extends TestCase {
- SnippetToParagraph converter = new SnippetToParagraph();
-
- public void testConversionTest(){
- HitBase input = new HitBase();
- input.setAbstractText("... complicity in the military's latest failure to uphold their own standards of conduct. Nor do I see a distinction between the service member who orchestrated this offense ...");
- input.setUrl("http://armedservices.house.gov/index.cfm/press-releases?ContentRecord_id=b5d9aeab-6745-4eba-94ea-12295fd40e67");
- input.setTitle("Press Releases - News - Armed Services Republicans");
- HitBase result = converter.formTextFromOriginalPageGivenSnippet(input);
- assertTrue(result.getOriginalSentences()!=null);
- assertTrue(result.getOriginalSentences().size()>0);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java
deleted file mode 100644
index bbce9e8..0000000
--- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.communicative_actions;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;
-import opennlp.tools.parse_thicket.matching.Matcher;
-import opennlp.tools.similarity.apps.HitBase;
-
-import junit.framework.TestCase;
-
-public class CommunicativeActionsArcBuilderTest extends TestCase {
- Matcher matcher = new Matcher();
-
- public void testCommunicativeActionsArcBuilderTestQ(){
- String text = "As a US citizen living abroad, I am concerned about the health reform regulation of 2014. "+
- "I do not want to wait till I am sick to buy health insurance. "+
- "Yet I am afraid I will end up being requested to pay the tax. "+
- "Although I live abroad, I am worried about having to pay a fine for being reported as not having health insurance coverage. ";
- ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text);
- List<WordWordInterSentenceRelationArc> results = new ArrayList<WordWordInterSentenceRelationArc>();
- for(WordWordInterSentenceRelationArc arc: pt.getArcs()){
- if(arc.getArcType().getType().startsWith("ca")){
- results.add(arc);
- System.out.println(arc);
- }
- }
- assertTrue(results.size()>11);
-
- }
- public void testCommunicativeActionsArcBuilderTestA(){
- String text = "People are worried about paying a fine for not carrying health insurance coverage, having been informed by IRS about new regulations. "+
- "Yet hardly anyone is expected to pay the tax, when the health reform law takes full effect in 2014. "+
- "The individual mandate confirms that people don\ufffdt wait until they are sick to buy health insurance. "+
- "People are exempt from health insurance fine if they report they make too little money, or US citizens living abroad.";
- ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text);
- List<WordWordInterSentenceRelationArc> results = new ArrayList<WordWordInterSentenceRelationArc>();
- for(WordWordInterSentenceRelationArc arc: pt.getArcs()){
- if(arc.getArcType().getType().startsWith("ca")){
- results.add(arc);
- System.out.println(arc);
- }
- }
- assertTrue(results.size()>5);
- }
-
-
-
-
-}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/external_rst/ExternalRSTImporterTest.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/external_rst/ExternalRSTImporterTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/external_rst/ExternalRSTImporterTest.java
deleted file mode 100644
index c2b5877..0000000
--- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/external_rst/ExternalRSTImporterTest.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.external_rst;
-
-
-import java.util.List;
-
-import junit.framework.TestCase;
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;
-import opennlp.tools.parse_thicket.matching.Matcher;
-
-public class ExternalRSTImporterTest extends TestCase{
-
-
- public void testBuildParseThicketFromTextWithRSTtest(){
- Matcher m = new Matcher();
- // We combine our own RST rules with those of Joty 2014 to produce an augmented parse thicket
- String externalRSTresultFilename = "/external_rst/resInput.txt";
-
- ParseThicket pt = m.buildParseThicketFromTextWithRST("I explained that I made a deposit, and then wrote a check, which bounced due to a bank error. A customer service representative confirmed that it usually takes a day to process the deposit. "
- + "I reminded that I was unfairly charged an overdraft fee amonth ago in a similar situation. "+
- " They explained that the overdraft fee was due to insufficient funds as disclosed in my account information. I disagreed with their fee because I made a deposit well in "+
- " advance and wanted this fee back. They denied responsibility saying that nothing an be done at this point. They also confirmed that I needed to look into the account rules closer.");
- ExternalRSTImporter imp = new ExternalRSTImporter();
-
- List<WordWordInterSentenceRelationArc> arcsRST = imp.buildPT2ptPhrases( pt , externalRSTresultFilename);
- assertTrue(arcsRST .size() > 10);
-
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java b/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java
deleted file mode 100644
index 0517f4c..0000000
--- a/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.parse_thicket.matching;
-
-import java.util.List;
-
-import opennlp.tools.parse_thicket.ParseTreeNode;
-
-
-import junit.framework.TestCase;
-
-public class PT2ThicketPhraseBuilderTest extends TestCase {
- private PT2ThicketPhraseBuilder builder = new PT2ThicketPhraseBuilder();
-
- public void testParsePhrase(){
- String line = "(NP (NNP Iran)) (VP (VBZ refuses) (S (VP (TO to) (VP (VB accept) (S (NP (DT the) " +
- "(NNP UN) (NN proposal)) (VP (TO to) (VP (VB end) (NP (PRP$ its) (NN dispute))))))))";
-
- List<ParseTreeNode> res = builder.parsePhrase("NP", line);
- System.out.println(res);
- assertTrue(res!=null);
- assertTrue(res.size()>0);
-
- }
-}