You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by bg...@apache.org on 2012/10/03 00:54:50 UTC
svn commit: r1393228 - in /opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr: ./ IterativeQueryComponent.java SyntGenRequestHandler.java

Author: bgalitsky
Date: Tue Oct  2 22:54:49 2012
New Revision: 1393228

URL: http://svn.apache.org/viewvc?rev=1393228&view=rev
Log:
OPENNLP-540 SOLR request handler for search results re-ranking based on 'Similarity'

Added:
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java?rev=1393228&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java Tue Oct  2 22:54:49 2012
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.similarity.apps.solr;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.handler.component.QueryComponent;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.ResultContext;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.DocList;
+
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QParserPlugin;
+import org.apache.solr.search.QueryParsing;
+
+
+
+public class IterativeQueryComponent extends QueryComponent{
+	public static final String COMPONENT_NAME = "iterative_query";
+	public static final String[] fieldSequence = new String[]{"cat", "name", "content", "author"}; 
+
+	/**
+	 * Run the query multiple times againts various fields, trying to recognize search intention
+	 */
+	@Override
+	public void process(ResponseBuilder rb) throws IOException {
+
+		NamedList nameValuePairs = rb.rsp.getValues();
+		nameValuePairs.remove("response");
+		rb.rsp.setAllValues(nameValuePairs);
+		rb = substituteField(rb, fieldSequence[0] );
+		super.process(rb);
+
+		for(int iter = 1; iter<fieldSequence.length; iter++){
+			nameValuePairs = rb.rsp.getValues();
+			ResultContext c = (ResultContext) nameValuePairs.get("response");
+			if (c!=null){			
+				DocList dList = c.docs;
+				if (dList.size()<1){
+					nameValuePairs.remove("response");
+					rb.rsp.setAllValues(nameValuePairs);
+					rb = substituteField(rb, fieldSequence[iter] );
+
+					super.process(rb);
+				}
+				else {
+					return;
+				}
+			}
+		}
+	}
+
+	private ResponseBuilder substituteField(ResponseBuilder rb, String newFieldName) {
+		SolrParams params = rb.req.getParams();
+		String query = params.get("q");
+		String currField = StringUtils.substringBetween(" "+query, " ", ":");
+		if ( currField !=null && newFieldName!=null)
+			query = query.replace(currField, newFieldName);
+		NamedList values = params.toNamedList();
+		values.remove("q");
+		values.add("q", query);
+		params = SolrParams.toSolrParams(values);
+		rb.req.setParams(params);
+		rb.setQueryString(query);
+
+		String defType = params.get(QueryParsing.DEFTYPE,QParserPlugin.DEFAULT_QTYPE);
+
+		// get it from the response builder to give a different component a chance
+		// to set it.
+		String queryString = rb.getQueryString();
+		if (queryString == null) {
+			// this is the normal way it's set.
+			queryString = params.get( CommonParams.Q );
+			rb.setQueryString(queryString);
+		}
+
+		try {
+			QParser parser = QParser.getParser(rb.getQueryString(), defType, rb.req);
+			Query q = parser.getQuery();
+			if (q == null) {
+				// normalize a null query to a query that matches nothing
+				q = new BooleanQuery();        
+			}
+			rb.setQuery( q );
+			rb.setSortSpec( parser.getSort(true) );
+			rb.setQparser(parser);
+			rb.setScoreDoc(parser.getPaging());
+
+		} catch (ParseException e) {
+			throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
+		}
+		return rb;
+	}
+
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java?rev=1393228&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java Tue Oct  2 22:54:49 2012
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.similarity.apps.solr;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import opennlp.tools.similarity.apps.utils.Pair;
+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
+import opennlp.tools.textsimilarity.SentencePairMatchResult;
+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.handler.component.SearchHandler;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.ResultContext;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocSlice;
+
+public class SyntGenRequestHandler extends SearchHandler {
+
+	private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
+
+	public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp){
+      try {
+          super.handleRequestBody(req, rsp);
+      } catch (Exception e) {
+          // TODO Auto-generated catch block
+          e.printStackTrace();
+      }
+      //modify rsp
+      NamedList values = rsp.getValues();
+      ResultContext c = (ResultContext) values.get("response");
+      if (c==null)
+          return;
+
+      DocList dList = c.docs;
+      DocList dListResult=null;
+      try {
+          dListResult = filterResultsBySyntMatchReduceDocSet(dList,
+                  req,  req.getParams());
+      } catch (Exception e) {
+          dListResult = dList;
+          // TODO Auto-generated catch block
+          e.printStackTrace();
+      }
+      c.docs = dListResult;
+      values.remove("response");
+      values.add("response", c.docs);
+      rsp.setAllValues(values);
+  }
+
+
+  public DocList filterResultsBySyntMatchReduceDocSet(DocList docList,
+          SolrQueryRequest req,  SolrParams params) {     
+      //if (!docList.hasScores()) 
+      //  return docList;
+
+      int len = docList.size();
+      if (len < 1) // do nothing
+          return docList;
+      ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor .getInstance();
+
+      DocIterator iter = docList.iterator();
+      float[] syntMatchScoreArr = new float[len];
+      String requestExpression = req.getParamString();
+      String[] exprParts = requestExpression.split("&");
+      for(String part: exprParts){
+          if (part.startsWith("q="))
+              requestExpression = part;           
+      }
+      String fieldNameQuery = StringUtils.substringBetween(requestExpression, "=", ":");
+      // extract phrase query (in double-quotes)
+      String[] queryParts = requestExpression.split("\"");
+      if  (queryParts.length>=2 && queryParts[1].length()>5)
+          requestExpression = queryParts[1].replace('+', ' ');    
+      else if (requestExpression.indexOf(":") > -1 ) {// still field-based expression
+          requestExpression = requestExpression.replaceAll(fieldNameQuery+":", "").replace('+',' ').replaceAll("  ", " ").replace("q=", "");
+      }
+      
+      if (fieldNameQuery ==null)
+          return docList;
+      if (requestExpression==null || requestExpression.length()<5  || requestExpression.split(" ").length<3)
+          return docList;
+      int[] docIDsHits = new int[len]; 
+
+      IndexReader indexReader = req.getSearcher().getIndexReader();
+      List<Integer> bestMatchesDocIds = new ArrayList<Integer>(); List<Float> bestMatchesScore = new ArrayList<Float>();
+      List<Pair<Integer, Float>> docIdsScores = new ArrayList<Pair<Integer, Float>> ();
+      try {
+          for (int i=0; i<docList.size(); ++i) {
+              int docId = iter.nextDoc();
+              docIDsHits[i] = docId;
+              Document doc = indexReader.document(docId);
+
+              // get text for event
+              String answerText = doc.get(fieldNameQuery);
+              if (answerText==null)
+                  continue;
+              SentencePairMatchResult matchResult = pos.assessRelevance( requestExpression , answerText);
+              float syntMatchScore =  new Double(parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult.getMatchResult())).floatValue();
+              bestMatchesDocIds.add(docId);
+              bestMatchesScore.add(syntMatchScore);
+              syntMatchScoreArr[i] = (float)syntMatchScore; //*iter.score();
+              System.out.println(" Matched query = '"+requestExpression + "' with answer = '"+answerText +"' | doc_id = '"+docId);
+              System.out.println(" Match result = '"+matchResult.getMatchResult() + "' with score = '"+syntMatchScore +"';" );
+              docIdsScores.add(new Pair(docId, syntMatchScore));
+          }
+
+      } catch (CorruptIndexException e1) {
+          // TODO Auto-generated catch block
+          e1.printStackTrace();
+          //log.severe("Corrupt index"+e1);
+      } catch (IOException e1) {
+          // TODO Auto-generated catch block
+          e1.printStackTrace();
+          //log.severe("File read IO / index"+e1);
+      }
+      
+      
+      Collections.sort(docIdsScores, new PairComparable());
+      for(int i = 0; i<docIdsScores.size(); i++){
+          bestMatchesDocIds.set(i, docIdsScores.get(i).getFirst());
+          bestMatchesScore.set(i, docIdsScores.get(i).getSecond());
+      }
+      System.out.println(bestMatchesScore);
+      float maxScore = docList.maxScore(); // do not change
+      int limit = docIdsScores.size();
+      int start = 0; 
+      DocSlice ds = null;
+
+      ds = new DocSlice(start, limit, 
+              ArrayUtils.toPrimitive(bestMatchesDocIds.toArray(new Integer[0])), 
+              ArrayUtils.toPrimitive(bestMatchesScore.toArray(new Float[0])), 
+              bestMatchesDocIds.size(), maxScore);
+
+
+
+      return ds;
+  }
+
+	public class PairComparable implements Comparator<Pair> {
+		// @Override
+		public int compare(Pair o1, Pair o2) {
+			int b = -2;
+			if ( o1.getSecond() instanceof Float && o2.getSecond() instanceof Float){
+
+				b =  (((Float)o1.getSecond()> (Float)o2.getSecond()) ? -1
+						: (((Float)o1.getSecond() == (Float)o2.getSecond()) ? 0 : 1));
+			}
+			return b;
+		}
+	}
+
+}