You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by bg...@apache.org on 2012/10/03 00:54:50 UTC
svn commit: r1393228 - in
/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr:
./ IterativeQueryComponent.java SyntGenRequestHandler.java
Author: bgalitsky
Date: Tue Oct 2 22:54:49 2012
New Revision: 1393228
URL: http://svn.apache.org/viewvc?rev=1393228&view=rev
Log:
OPENNLP-540 SOLR request handler for search results re-ranking based on 'Similarity'
Added:
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java?rev=1393228&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java Tue Oct 2 22:54:49 2012
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.similarity.apps.solr;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.handler.component.QueryComponent;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.ResultContext;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.DocList;
+
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QParserPlugin;
+import org.apache.solr.search.QueryParsing;
+
+
+
+public class IterativeQueryComponent extends QueryComponent{
+ public static final String COMPONENT_NAME = "iterative_query";
+ public static final String[] fieldSequence = new String[]{"cat", "name", "content", "author"};
+
+ /**
+ * Run the query multiple times againts various fields, trying to recognize search intention
+ */
+ @Override
+ public void process(ResponseBuilder rb) throws IOException {
+
+ NamedList nameValuePairs = rb.rsp.getValues();
+ nameValuePairs.remove("response");
+ rb.rsp.setAllValues(nameValuePairs);
+ rb = substituteField(rb, fieldSequence[0] );
+ super.process(rb);
+
+ for(int iter = 1; iter<fieldSequence.length; iter++){
+ nameValuePairs = rb.rsp.getValues();
+ ResultContext c = (ResultContext) nameValuePairs.get("response");
+ if (c!=null){
+ DocList dList = c.docs;
+ if (dList.size()<1){
+ nameValuePairs.remove("response");
+ rb.rsp.setAllValues(nameValuePairs);
+ rb = substituteField(rb, fieldSequence[iter] );
+
+ super.process(rb);
+ }
+ else {
+ return;
+ }
+ }
+ }
+ }
+
+ private ResponseBuilder substituteField(ResponseBuilder rb, String newFieldName) {
+ SolrParams params = rb.req.getParams();
+ String query = params.get("q");
+ String currField = StringUtils.substringBetween(" "+query, " ", ":");
+ if ( currField !=null && newFieldName!=null)
+ query = query.replace(currField, newFieldName);
+ NamedList values = params.toNamedList();
+ values.remove("q");
+ values.add("q", query);
+ params = SolrParams.toSolrParams(values);
+ rb.req.setParams(params);
+ rb.setQueryString(query);
+
+ String defType = params.get(QueryParsing.DEFTYPE,QParserPlugin.DEFAULT_QTYPE);
+
+ // get it from the response builder to give a different component a chance
+ // to set it.
+ String queryString = rb.getQueryString();
+ if (queryString == null) {
+ // this is the normal way it's set.
+ queryString = params.get( CommonParams.Q );
+ rb.setQueryString(queryString);
+ }
+
+ try {
+ QParser parser = QParser.getParser(rb.getQueryString(), defType, rb.req);
+ Query q = parser.getQuery();
+ if (q == null) {
+ // normalize a null query to a query that matches nothing
+ q = new BooleanQuery();
+ }
+ rb.setQuery( q );
+ rb.setSortSpec( parser.getSort(true) );
+ rb.setQparser(parser);
+ rb.setScoreDoc(parser.getPaging());
+
+ } catch (ParseException e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
+ }
+ return rb;
+ }
+
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java?rev=1393228&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java Tue Oct 2 22:54:49 2012
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.similarity.apps.solr;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import opennlp.tools.similarity.apps.utils.Pair;
+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
+import opennlp.tools.textsimilarity.SentencePairMatchResult;
+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.handler.component.SearchHandler;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.ResultContext;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocSlice;
+
+public class SyntGenRequestHandler extends SearchHandler {
+
+ private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
+
+ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp){
+ try {
+ super.handleRequestBody(req, rsp);
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ //modify rsp
+ NamedList values = rsp.getValues();
+ ResultContext c = (ResultContext) values.get("response");
+ if (c==null)
+ return;
+
+ DocList dList = c.docs;
+ DocList dListResult=null;
+ try {
+ dListResult = filterResultsBySyntMatchReduceDocSet(dList,
+ req, req.getParams());
+ } catch (Exception e) {
+ dListResult = dList;
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ c.docs = dListResult;
+ values.remove("response");
+ values.add("response", c.docs);
+ rsp.setAllValues(values);
+ }
+
+
+ public DocList filterResultsBySyntMatchReduceDocSet(DocList docList,
+ SolrQueryRequest req, SolrParams params) {
+ //if (!docList.hasScores())
+ // return docList;
+
+ int len = docList.size();
+ if (len < 1) // do nothing
+ return docList;
+ ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor .getInstance();
+
+ DocIterator iter = docList.iterator();
+ float[] syntMatchScoreArr = new float[len];
+ String requestExpression = req.getParamString();
+ String[] exprParts = requestExpression.split("&");
+ for(String part: exprParts){
+ if (part.startsWith("q="))
+ requestExpression = part;
+ }
+ String fieldNameQuery = StringUtils.substringBetween(requestExpression, "=", ":");
+ // extract phrase query (in double-quotes)
+ String[] queryParts = requestExpression.split("\"");
+ if (queryParts.length>=2 && queryParts[1].length()>5)
+ requestExpression = queryParts[1].replace('+', ' ');
+ else if (requestExpression.indexOf(":") > -1 ) {// still field-based expression
+ requestExpression = requestExpression.replaceAll(fieldNameQuery+":", "").replace('+',' ').replaceAll(" ", " ").replace("q=", "");
+ }
+
+ if (fieldNameQuery ==null)
+ return docList;
+ if (requestExpression==null || requestExpression.length()<5 || requestExpression.split(" ").length<3)
+ return docList;
+ int[] docIDsHits = new int[len];
+
+ IndexReader indexReader = req.getSearcher().getIndexReader();
+ List<Integer> bestMatchesDocIds = new ArrayList<Integer>(); List<Float> bestMatchesScore = new ArrayList<Float>();
+ List<Pair<Integer, Float>> docIdsScores = new ArrayList<Pair<Integer, Float>> ();
+ try {
+ for (int i=0; i<docList.size(); ++i) {
+ int docId = iter.nextDoc();
+ docIDsHits[i] = docId;
+ Document doc = indexReader.document(docId);
+
+ // get text for event
+ String answerText = doc.get(fieldNameQuery);
+ if (answerText==null)
+ continue;
+ SentencePairMatchResult matchResult = pos.assessRelevance( requestExpression , answerText);
+ float syntMatchScore = new Double(parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult.getMatchResult())).floatValue();
+ bestMatchesDocIds.add(docId);
+ bestMatchesScore.add(syntMatchScore);
+ syntMatchScoreArr[i] = (float)syntMatchScore; //*iter.score();
+ System.out.println(" Matched query = '"+requestExpression + "' with answer = '"+answerText +"' | doc_id = '"+docId);
+ System.out.println(" Match result = '"+matchResult.getMatchResult() + "' with score = '"+syntMatchScore +"';" );
+ docIdsScores.add(new Pair(docId, syntMatchScore));
+ }
+
+ } catch (CorruptIndexException e1) {
+ // TODO Auto-generated catch block
+ e1.printStackTrace();
+ //log.severe("Corrupt index"+e1);
+ } catch (IOException e1) {
+ // TODO Auto-generated catch block
+ e1.printStackTrace();
+ //log.severe("File read IO / index"+e1);
+ }
+
+
+ Collections.sort(docIdsScores, new PairComparable());
+ for(int i = 0; i<docIdsScores.size(); i++){
+ bestMatchesDocIds.set(i, docIdsScores.get(i).getFirst());
+ bestMatchesScore.set(i, docIdsScores.get(i).getSecond());
+ }
+ System.out.println(bestMatchesScore);
+ float maxScore = docList.maxScore(); // do not change
+ int limit = docIdsScores.size();
+ int start = 0;
+ DocSlice ds = null;
+
+ ds = new DocSlice(start, limit,
+ ArrayUtils.toPrimitive(bestMatchesDocIds.toArray(new Integer[0])),
+ ArrayUtils.toPrimitive(bestMatchesScore.toArray(new Float[0])),
+ bestMatchesDocIds.size(), maxScore);
+
+
+
+ return ds;
+ }
+
+ public class PairComparable implements Comparator<Pair> {
+ // @Override
+ public int compare(Pair o1, Pair o2) {
+ int b = -2;
+ if ( o1.getSecond() instanceof Float && o2.getSecond() instanceof Float){
+
+ b = (((Float)o1.getSecond()> (Float)o2.getSecond()) ? -1
+ : (((Float)o1.getSecond() == (Float)o2.getSecond()) ? 0 : 1));
+ }
+ return b;
+ }
+ }
+
+}