You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by cu...@apache.org on 2003/01/15 20:25:05 UTC
cvs commit: jakarta-lucene/src/java/org/apache/lucene/search BooleanQuery.java BooleanScorer.java IndexSearcher.java PhrasePrefixQuery.java PhraseQuery.java PhraseScorer.java TermQuery.java TermScorer.java Weight.java
cutting 2003/01/15 11:25:05
Modified: src/java/org/apache/lucene/search BooleanQuery.java
BooleanScorer.java IndexSearcher.java
PhrasePrefixQuery.java PhraseQuery.java
PhraseScorer.java TermQuery.java TermScorer.java
Weight.java
Log:
Revised explanation format so that it better corresponds to a dot product of tf*idf weights.
Revision Changes Path
1.11 +41 -6 jakarta-lucene/src/java/org/apache/lucene/search/BooleanQuery.java
Index: BooleanQuery.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/BooleanQuery.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- BooleanQuery.java 14 Jan 2003 21:57:30 -0000 1.10
+++ BooleanQuery.java 15 Jan 2003 19:25:04 -0000 1.11
@@ -150,13 +150,48 @@
return result;
}
- public Explanation explain() throws IOException {
- Explanation result = new Explanation();
- result.setDescription("boost(" + getQuery() + ")");
- result.setValue(getBoost());
- return result;
- }
+ public Explanation explain(IndexReader reader, int doc)
+ throws IOException {
+ Explanation sumExpl = new Explanation();
+ sumExpl.setDescription("sum of:");
+ int coord = 0;
+ int maxCoord = 0;
+ float sum = 0.0f;
+ for (int i = 0 ; i < weights.size(); i++) {
+ BooleanClause c = (BooleanClause)clauses.elementAt(0);
+ Weight w = (Weight)weights.elementAt(i);
+ Explanation e = w.explain(reader, doc);
+ if (!c.prohibited) maxCoord++;
+ if (e.getValue() > 0) {
+ if (!c.prohibited) {
+ sumExpl.addDetail(e);
+ sum += e.getValue();
+ coord++;
+ } else {
+ return new Explanation(0.0f, "match prohibited");
+ }
+ } else if (c.required) {
+ return new Explanation(0.0f, "match required");
+ }
+ }
+ sumExpl.setValue(sum);
+
+ if (coord == 1) // only one clause matched
+ sumExpl = sumExpl.getDetails()[0]; // eliminate wrapper
+ float coordFactor = searcher.getSimilarity().coord(coord, maxCoord);
+ if (coordFactor == 1.0f) // coord is no-op
+ return sumExpl; // eliminate wrapper
+ else {
+ Explanation result = new Explanation();
+ result.setDescription("product of:");
+ result.addDetail(sumExpl);
+ result.addDetail(new Explanation(coordFactor,
+ "coord("+coord+"/"+maxCoord+")"));
+ result.setValue(sum*coordFactor);
+ return result;
+ }
+ }
}
protected Weight createWeight(Searcher searcher) {
1.4 +1 -35 jakarta-lucene/src/java/org/apache/lucene/search/BooleanScorer.java
Index: BooleanScorer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/BooleanScorer.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- BooleanScorer.java 13 Jan 2003 23:50:33 -0000 1.3
+++ BooleanScorer.java 15 Jan 2003 19:25:04 -0000 1.4
@@ -208,41 +208,7 @@
}
public Explanation explain(int doc) throws IOException {
- Explanation sumExpl = new Explanation();
- sumExpl.setDescription("sum of:");
- int coord = 0;
- float sum = 0.0f;
- for (SubScorer s = scorers; s != null; s = s.next) {
- Explanation e = s.scorer.explain(doc);
- if (e.getValue() > 0) {
- if (!s.prohibited) {
- sumExpl.addDetail(e);
- sum += e.getValue();
- coord++;
- } else {
- return new Explanation(0.0f, "match prohibited");
- }
- } else if (s.required) {
- return new Explanation(0.0f, "match required");
- }
- }
- sumExpl.setValue(sum);
-
- if (coord == 1) // only one clause matched
- sumExpl = sumExpl.getDetails()[0]; // eliminate wrapper
-
- float coordFactor = getSimilarity().coord(coord, maxCoord-1);
- if (coordFactor == 1.0f) // coord is no-op
- return sumExpl; // eliminate wrapper
- else {
- Explanation result = new Explanation();
- result.setDescription("product of:");
- result.addDetail(sumExpl);
- result.addDetail(new Explanation(coordFactor,
- "coord("+coord+"/"+(maxCoord-1)+")"));
- result.setValue(sum*coordFactor);
- return result;
- }
+ throw new UnsupportedOperationException();
}
}
1.7 +1 -1 jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java
Index: IndexSearcher.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- IndexSearcher.java 14 Jan 2003 00:04:37 -0000 1.6
+++ IndexSearcher.java 15 Jan 2003 19:25:04 -0000 1.7
@@ -197,7 +197,7 @@
}
public Explanation explain(Query query, int doc) throws IOException {
- return query.weight(this).scorer(reader).explain(doc);
+ return query.weight(this).explain(reader, doc);
}
}
1.7 +51 -18 jakarta-lucene/src/java/org/apache/lucene/search/PhrasePrefixQuery.java
Index: PhrasePrefixQuery.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/PhrasePrefixQuery.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- PhrasePrefixQuery.java 14 Jan 2003 21:57:30 -0000 1.6
+++ PhrasePrefixQuery.java 15 Jan 2003 19:25:04 -0000 1.7
@@ -125,6 +125,7 @@
private float value;
private float idf;
private float queryNorm;
+ private float queryWeight;
public PhrasePrefixWeight(Searcher searcher) {
this.searcher = searcher;
@@ -141,14 +142,14 @@
idf += searcher.getSimilarity().idf(terms[j], searcher);
}
- value = idf * getBoost();
- return value * value;
+ queryWeight = idf * getBoost(); // compute query weight
+ return queryWeight * queryWeight; // square it
}
- public void normalize(float norm) {
- queryNorm = norm;
- queryNorm *= idf; // factor from document
- value *= queryNorm; // normalize for query
+ public void normalize(float queryNorm) {
+ this.queryNorm = queryNorm;
+ queryWeight *= queryNorm; // normalize query weight
+ value = queryWeight * idf; // idf for document
}
public Scorer scorer(IndexReader reader) throws IOException {
@@ -179,25 +180,57 @@
slop, reader.norms(field));
}
- public Explanation explain() throws IOException {
- Query q = getQuery();
-
+ public Explanation explain(IndexReader reader, int doc)
+ throws IOException {
Explanation result = new Explanation();
- result.setDescription("weight(" + getQuery() + "), product of:");
+ result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
+
+ Explanation idfExpl = new Explanation(idf, "idf("+getQuery()+")");
+
+ // explain query weight
+ Explanation queryExpl = new Explanation();
+ queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
Explanation boostExpl = new Explanation(getBoost(), "boost");
if (getBoost() != 1.0f)
- result.addDetail(boostExpl);
+ queryExpl.addDetail(boostExpl);
+
+ queryExpl.addDetail(idfExpl);
+
+ Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
+ queryExpl.addDetail(queryNormExpl);
- Explanation idfExpl = new Explanation(idf, "idf");
- result.addDetail(idfExpl);
+ queryExpl.setValue(boostExpl.getValue() *
+ idfExpl.getValue() *
+ queryNormExpl.getValue());
+
+ result.addDetail(queryExpl);
+
+ // explain field weight
+ Explanation fieldExpl = new Explanation();
+ fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+
+ "), product of:");
+
+ Explanation tfExpl = scorer(reader).explain(doc);
+ fieldExpl.addDetail(tfExpl);
+ fieldExpl.addDetail(idfExpl);
+
+ Explanation fieldNormExpl = new Explanation();
+ fieldNormExpl.setValue(Similarity.decodeNorm(reader.norms(field)[doc]));
+ fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
+ fieldExpl.addDetail(fieldNormExpl);
+
+ fieldExpl.setValue(tfExpl.getValue() *
+ idfExpl.getValue() *
+ fieldNormExpl.getValue());
- Explanation normExpl = new Explanation(queryNorm, "queryNorm");
- result.addDetail(normExpl);
+ result.addDetail(fieldExpl);
+
+ // combine them
+ result.setValue(queryExpl.getValue() * fieldExpl.getValue());
- result.setValue(boostExpl.getValue() *
- idfExpl.getValue() *
- normExpl.getValue());
+ if (queryExpl.getValue() == 1.0f)
+ return fieldExpl;
return result;
}
1.10 +67 -22 jakarta-lucene/src/java/org/apache/lucene/search/PhraseQuery.java
Index: PhraseQuery.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/PhraseQuery.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- PhraseQuery.java 14 Jan 2003 21:57:30 -0000 1.9
+++ PhraseQuery.java 15 Jan 2003 19:25:04 -0000 1.10
@@ -112,6 +112,7 @@
private float value;
private float idf;
private float queryNorm;
+ private float queryWeight;
public PhraseWeight(Searcher searcher) {
this.searcher = searcher;
@@ -122,14 +123,14 @@
public float sumOfSquaredWeights() throws IOException {
idf = searcher.getSimilarity().idf(terms, searcher);
- value = idf * getBoost();
- return value * value; // square term weights
+ queryWeight = idf * getBoost(); // compute query weight
+ return queryWeight * queryWeight; // square it
}
- public void normalize(float norm) {
- queryNorm = norm;
- queryNorm *= idf; // factor from document
- value *= queryNorm; // normalize for query
+ public void normalize(float queryNorm) {
+ this.queryNorm = queryNorm;
+ queryWeight *= queryNorm; // normalize query weight
+ value = queryWeight * idf; // idf for document
}
public Scorer scorer(IndexReader reader) throws IOException {
@@ -154,33 +155,77 @@
}
- public Explanation explain() throws IOException {
- Query q = getQuery();
+ public Explanation explain(IndexReader reader, int doc)
+ throws IOException {
Explanation result = new Explanation();
- result.setDescription("weight(" + getQuery() + "), product of:");
+ result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
- Explanation boostExpl = new Explanation(getBoost(), "boost");
- if (getBoost() != 1.0f)
- result.addDetail(boostExpl);
-
StringBuffer docFreqs = new StringBuffer();
+ StringBuffer query = new StringBuffer();
+ query.append('\"');
for (int i = 0; i < terms.size(); i++) {
- if (i != 0) docFreqs.append(" ");
- docFreqs.append(((Term)terms.elementAt(i)).text());
+ if (i != 0) {
+ docFreqs.append(" ");
+ query.append(" ");
+ }
+
+ Term term = (Term)terms.elementAt(i);
+
+ docFreqs.append(term.text());
docFreqs.append("=");
- docFreqs.append(searcher.docFreq((Term)terms.elementAt(i)));
+ docFreqs.append(searcher.docFreq(term));
+
+ query.append(term.text());
}
+ query.append('\"');
+
Explanation idfExpl =
new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");
- result.addDetail(idfExpl);
- Explanation normExpl = new Explanation(queryNorm, "queryNorm");
- result.addDetail(normExpl);
+ // explain query weight
+ Explanation queryExpl = new Explanation();
+ queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
+
+ Explanation boostExpl = new Explanation(getBoost(), "boost");
+ if (getBoost() != 1.0f)
+ queryExpl.addDetail(boostExpl);
+ queryExpl.addDetail(idfExpl);
+
+ Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
+ queryExpl.addDetail(queryNormExpl);
+
+ queryExpl.setValue(boostExpl.getValue() *
+ idfExpl.getValue() *
+ queryNormExpl.getValue());
+
+ result.addDetail(queryExpl);
+
+ // explain field weight
+ Explanation fieldExpl = new Explanation();
+ fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+
+ "), product of:");
+
+ Explanation tfExpl = scorer(reader).explain(doc);
+ fieldExpl.addDetail(tfExpl);
+ fieldExpl.addDetail(idfExpl);
+
+ Explanation fieldNormExpl = new Explanation();
+ fieldNormExpl.setValue(Similarity.decodeNorm(reader.norms(field)[doc]));
+ fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
+ fieldExpl.addDetail(fieldNormExpl);
+
+ fieldExpl.setValue(tfExpl.getValue() *
+ idfExpl.getValue() *
+ fieldNormExpl.getValue());
+
+ result.addDetail(fieldExpl);
+
+ // combine them
+ result.setValue(queryExpl.getValue() * fieldExpl.getValue());
- result.setValue(boostExpl.getValue() *
- idfExpl.getValue() *
- normExpl.getValue());
+ if (queryExpl.getValue() == 1.0f)
+ return fieldExpl;
return result;
}
1.6 +2 -20 jakarta-lucene/src/java/org/apache/lucene/search/PhraseScorer.java
Index: PhraseScorer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/PhraseScorer.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- PhraseScorer.java 13 Jan 2003 23:50:33 -0000 1.5
+++ PhraseScorer.java 15 Jan 2003 19:25:04 -0000 1.6
@@ -130,15 +130,8 @@
}
public Explanation explain(final int doc) throws IOException {
- Explanation result = new Explanation();
- PhraseQuery query = (PhraseQuery)weight.getQuery();
-
- result.setDescription("phraseScore(" + query + "), product of:");
-
- Explanation weightExplanation = weight.explain();
- result.addDetail(weightExplanation);
-
Explanation tfExplanation = new Explanation();
+
score(new HitCollector() {
public final void collect(int d, float score) {}
}, doc+1);
@@ -146,19 +139,8 @@
float phraseFreq = (first.doc == doc) ? freq : 0.0f;
tfExplanation.setValue(getSimilarity().tf(phraseFreq));
tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
- result.addDetail(tfExplanation);
-
- Explanation normExplanation = new Explanation();
- normExplanation.setValue(Similarity.decodeNorm(norms[doc]));
- String field = query.getTerms()[0].field();
- normExplanation.setDescription("norm(field="+field + ", doc="+doc + ")");
- result.addDetail(normExplanation);
- result.setValue(weightExplanation.getValue() *
- tfExplanation.getValue() *
- normExplanation.getValue());
-
- return result;
+ return tfExplanation;
}
}
1.7 +53 -19 jakarta-lucene/src/java/org/apache/lucene/search/TermQuery.java
Index: TermQuery.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/TermQuery.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- TermQuery.java 14 Jan 2003 00:22:09 -0000 1.6
+++ TermQuery.java 15 Jan 2003 19:25:04 -0000 1.7
@@ -70,6 +70,7 @@
private float value;
private float idf;
private float queryNorm;
+ private float queryWeight;
public TermWeight(Searcher searcher) {
this.searcher = searcher;
@@ -79,15 +80,15 @@
public float getValue() { return value; }
public float sumOfSquaredWeights() throws IOException {
- idf = searcher.getSimilarity().idf(term, searcher);
- value = idf * getBoost();
- return value * value; // square term weights
+ idf = searcher.getSimilarity().idf(term, searcher); // compute idf
+ queryWeight = idf * getBoost(); // compute query weight
+ return queryWeight * queryWeight; // square it
}
- public void normalize(float norm) {
- queryNorm = norm;
- queryNorm *= idf; // factor from document
- value *= queryNorm; // normalize for query
+ public void normalize(float queryNorm) {
+ this.queryNorm = queryNorm;
+ queryWeight *= queryNorm; // normalize query weight
+ value = queryWeight * idf; // idf for document
}
public Scorer scorer(IndexReader reader) throws IOException {
@@ -100,26 +101,59 @@
reader.norms(term.field()));
}
- public Explanation explain() throws IOException {
- Query q = getQuery();
+ public Explanation explain(IndexReader reader, int doc)
+ throws IOException {
Explanation result = new Explanation();
- result.setDescription("weight(" + getQuery() + "), product of:");
+ result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
+
+ Explanation idfExpl =
+ new Explanation(idf, "idf(docFreq=" + searcher.docFreq(term) + ")");
+
+ // explain query weight
+ Explanation queryExpl = new Explanation();
+ queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
Explanation boostExpl = new Explanation(getBoost(), "boost");
if (getBoost() != 1.0f)
- result.addDetail(boostExpl);
+ queryExpl.addDetail(boostExpl);
+ queryExpl.addDetail(idfExpl);
- Explanation idfExpl =
- new Explanation(idf, "idf(docFreq=" + searcher.docFreq(term) + ")");
- result.addDetail(idfExpl);
+ Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
+ queryExpl.addDetail(queryNormExpl);
- Explanation normExpl = new Explanation(queryNorm,"queryNorm");
- result.addDetail(normExpl);
+ queryExpl.setValue(boostExpl.getValue() *
+ idfExpl.getValue() *
+ queryNormExpl.getValue());
+
+ result.addDetail(queryExpl);
+
+ // explain field weight
+ String field = term.field();
+ Explanation fieldExpl = new Explanation();
+ fieldExpl.setDescription("fieldWeight("+term+" in "+doc+
+ "), product of:");
+
+ Explanation tfExpl = scorer(reader).explain(doc);
+ fieldExpl.addDetail(tfExpl);
+ fieldExpl.addDetail(idfExpl);
+
+ Explanation fieldNormExpl = new Explanation();
+ fieldNormExpl.setValue(Similarity.decodeNorm(reader.norms(field)[doc]));
+ fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
+ fieldExpl.addDetail(fieldNormExpl);
+
+ fieldExpl.setValue(tfExpl.getValue() *
+ idfExpl.getValue() *
+ fieldNormExpl.getValue());
+
+ result.addDetail(fieldExpl);
+
+ // combine them
+ result.setValue(queryExpl.getValue() * fieldExpl.getValue());
- result.setValue(boostExpl.getValue() *
- idfExpl.getValue() *
- normExpl.getValue());
+ if (queryExpl.getValue() == 1.0f)
+ return fieldExpl;
return result;
}
1.5 +1 -19 jakarta-lucene/src/java/org/apache/lucene/search/TermScorer.java
Index: TermScorer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/TermScorer.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- TermScorer.java 13 Jan 2003 23:50:33 -0000 1.4
+++ TermScorer.java 15 Jan 2003 19:25:04 -0000 1.5
@@ -124,14 +124,7 @@
}
public Explanation explain(int doc) throws IOException {
- Explanation result = new Explanation();
TermQuery query = (TermQuery)weight.getQuery();
-
- result.setDescription("termScore(" + query + "), product of:");
-
- Explanation weightExplanation = weight.explain();
- result.addDetail(weightExplanation);
-
Explanation tfExplanation = new Explanation();
int tf = 0;
while (pointer < pointerMax) {
@@ -149,18 +142,7 @@
termDocs.close();
tfExplanation.setValue(getSimilarity().tf(tf));
tfExplanation.setDescription("tf(termFreq("+query.getTerm()+")="+tf+")");
- result.addDetail(tfExplanation);
-
- Explanation normExplanation = new Explanation();
- normExplanation.setValue(Similarity.decodeNorm(norms[doc]));
- String field = query.getTerm().field();
- normExplanation.setDescription("norm(field="+field + ", doc="+doc + ")");
- result.addDetail(normExplanation);
-
- result.setValue(weightExplanation.getValue() *
- tfExplanation.getValue() *
- normExplanation.getValue());
- return result;
+ return tfExplanation;
}
}
1.2 +2 -2 jakarta-lucene/src/java/org/apache/lucene/search/Weight.java
Index: Weight.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/Weight.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Weight.java 13 Jan 2003 23:50:33 -0000 1.1
+++ Weight.java 15 Jan 2003 19:25:04 -0000 1.2
@@ -83,6 +83,6 @@
/** Constructs a scorer for this. */
Scorer scorer(IndexReader reader) throws IOException;
- /** An explanation of this weight computation. */
- Explanation explain() throws IOException;
+ /** An explanation of the score computation for the named document. */
+ Explanation explain(IndexReader reader, int doc) throws IOException;
}
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>