You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/01/11 08:00:39 UTC
[03/50] [abbrv] lucene-solr:jira/solr-11702: LUCENE-8116: SimScorer
now only takes a frequency and a norm as per-document scoring factors.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fd7ead9/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
index 91e64c0..348584e 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
@@ -17,24 +17,18 @@
package org.apache.lucene.search.similarities;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Random;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
-import org.apache.lucene.search.similarities.Similarity.SimWeight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -54,119 +48,28 @@ import org.junit.BeforeClass;
* test fails to catch then this test needs to be improved! */
public abstract class BaseSimilarityTestCase extends LuceneTestCase {
- static LeafReader WITHOUT_NORM;
- static Directory WITHOUT_NORM_DIR;
-
- static LeafReader WITH_NORM_BASE;
- static Directory WITH_NORM_DIR;
- static List<LeafReader> NORM_VALUES;
+ static LeafReader READER;
+ static Directory DIR;
@BeforeClass
public static void beforeClass() throws Exception {
- // without norms
- WITHOUT_NORM_DIR = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), WITHOUT_NORM_DIR);
- Document doc = new Document();
- doc.add(newTextField("field", "value", Field.Store.NO));
- writer.addDocument(doc);
- WITHOUT_NORM = getOnlyLeafReader(writer.getReader());
- writer.close();
-
// with norms
- WITH_NORM_DIR = newDirectory();
- writer = new RandomIndexWriter(random(), WITH_NORM_DIR);
- doc = new Document();
+ DIR = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), DIR);
+ Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setOmitNorms(true);
doc.add(newField("field", "value", fieldType));
writer.addDocument(doc);
- WITH_NORM_BASE = getOnlyLeafReader(writer.getReader());
+ READER = getOnlyLeafReader(writer.getReader());
writer.close();
-
- // all possible norm values for the doc
- NORM_VALUES = new ArrayList<>();
- NORM_VALUES.add(WITHOUT_NORM);
- for (int i = 1; i < 256; i++) {
- final long value = i;
- NORM_VALUES.add(new FilterLeafReader(WITH_NORM_BASE) {
- @Override
- public CacheHelper getCoreCacheHelper() {
- return null;
- }
-
- @Override
- public CacheHelper getReaderCacheHelper() {
- return null;
- }
-
- @Override
- public NumericDocValues getNormValues(String field) throws IOException {
- if (field.equals("field")) {
- return new CannedNorm(value);
- } else {
- return super.getNormValues(field);
- }
- }
- });
- }
}
@AfterClass
public static void afterClass() throws Exception {
- IOUtils.close(WITH_NORM_BASE, WITH_NORM_DIR, WITHOUT_NORM, WITHOUT_NORM_DIR);
- WITH_NORM_BASE = WITHOUT_NORM = null;
- WITH_NORM_DIR = WITHOUT_NORM_DIR = null;
- NORM_VALUES = null;
- }
-
- /** 1-document norms impl of the given value */
- static class CannedNorm extends NumericDocValues {
- int docID = -1;
- final long value;
-
- CannedNorm(long value) {
- this.value = value;
- }
-
- @Override
- public long longValue() throws IOException {
- return value;
- }
-
- @Override
- public boolean advanceExact(int target) throws IOException {
- assert target == 0;
- docID = target;
- return true;
- }
-
- @Override
- public int docID() {
- return docID;
- }
-
- @Override
- public int nextDoc() throws IOException {
- if (docID == -1) {
- return docID = 0;
- } else {
- return docID = NO_MORE_DOCS;
- }
- }
-
- @Override
- public int advance(int target) throws IOException {
- if (target == 0) {
- return docID = 0;
- } else {
- return docID = NO_MORE_DOCS;
- }
- }
-
- @Override
- public long cost() {
- return 0;
- }
+ IOUtils.close(READER, DIR);
+ READER = null;
+ DIR = null;
}
/**
@@ -354,7 +257,7 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
Similarity similarity = getSimilarity(random);
for (int j = 0; j < 10; j++) {
// for each norm value...
- for (int k = 0; k < NORM_VALUES.size(); k++) {
+ for (int k = 1; k < 256; k++) {
CollectionStatistics corpus = newCorpus(random, k);
for (int l = 0; l < 10; l++) {
TermStatistics term = newTerm(random, corpus);
@@ -441,17 +344,16 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
/** runs for a single test case, so that if you hit a test failure you can write a reproducer just for that scenario */
private static void doTestScoring(Similarity similarity, CollectionStatistics corpus, TermStatistics term, float boost, float freq, int norm) throws IOException {
boolean success = false;
- SimWeight weight = similarity.computeWeight(boost, corpus, term);
- SimScorer scorer = similarity.simScorer(weight, NORM_VALUES.get(norm).getContext());
+ SimScorer scorer = similarity.scorer(boost, corpus, term);
try {
- float score = scorer.score(0, freq);
+ float score = scorer.score(freq, norm);
// check that score isn't infinite or negative
assertTrue("infinite/NaN score: " + score, Float.isFinite(score));
assertTrue("negative score: " + score, score >= 0);
float maxScore = scorer.maxScore(freq);
assertTrue("score > maxScore: " + score + " > " + maxScore, score <= maxScore);
// check explanation matches
- Explanation explanation = scorer.explain(0, Explanation.match(freq, "freq, occurrences of term within document"));
+ Explanation explanation = scorer.explain(Explanation.match(freq, "freq, occurrences of term within document"), norm);
if (score != explanation.getValue().doubleValue()) {
fail("expected: " + score + ", got: " + explanation);
}
@@ -467,12 +369,12 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
prevFreq = Math.nextDown(freq);
}
- float prevScore = scorer.score(0, prevFreq);
+ float prevScore = scorer.score(prevFreq, norm);
// check that score isn't infinite or negative
assertTrue(Float.isFinite(prevScore));
assertTrue(prevScore >= 0);
// check explanation matches
- Explanation prevExplanation = scorer.explain(0, Explanation.match(prevFreq, "freq, occurrences of term within document"));
+ Explanation prevExplanation = scorer.explain(Explanation.match(prevFreq, "freq, occurrences of term within document"), norm);
if (prevScore != prevExplanation.getValue().doubleValue()) {
fail("expected: " + prevScore + ", got: " + prevExplanation);
}
@@ -486,13 +388,12 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
// check score(norm-1), given the same freq it should be >= score(norm) [scores non-decreasing as docs get shorter]
if (norm > 1) {
- SimScorer prevNormScorer = similarity.simScorer(weight, NORM_VALUES.get(norm - 1).getContext());
- float prevNormScore = prevNormScorer.score(0, freq);
+ float prevNormScore = scorer.score(freq, norm - 1);
// check that score isn't infinite or negative
assertTrue(Float.isFinite(prevNormScore));
assertTrue(prevNormScore >= 0);
// check explanation matches
- Explanation prevNormExplanation = prevNormScorer.explain(0, Explanation.match(freq, "freq, occurrences of term within document"));
+ Explanation prevNormExplanation = scorer.explain(Explanation.match(freq, "freq, occurrences of term within document"), norm - 1);
if (prevNormScore != prevNormExplanation.getValue().doubleValue()) {
fail("expected: " + prevNormScore + ", got: " + prevNormExplanation);
}
@@ -508,14 +409,13 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
// check score(term-1), given the same freq/norm it should be >= score(term) [scores non-decreasing as terms get rarer]
if (term.docFreq() > 1 && freq < term.totalTermFreq()) {
TermStatistics prevTerm = new TermStatistics(term.term(), term.docFreq() - 1, term.totalTermFreq() - 1);
- SimWeight prevWeight = similarity.computeWeight(boost, corpus, term);
- SimScorer prevTermScorer = similarity.simScorer(prevWeight, NORM_VALUES.get(norm).getContext());
- float prevTermScore = prevTermScorer.score(0, freq);
+ SimScorer prevTermScorer = similarity.scorer(boost, corpus, term);
+ float prevTermScore = prevTermScorer.score(freq, norm);
// check that score isn't infinite or negative
assertTrue(Float.isFinite(prevTermScore));
assertTrue(prevTermScore >= 0);
// check explanation matches
- Explanation prevTermExplanation = prevTermScorer.explain(0, Explanation.match(freq, "freq, occurrences of term within document"));
+ Explanation prevTermExplanation = prevTermScorer.explain(Explanation.match(freq, "freq, occurrences of term within document"), norm);
if (prevTermScore != prevTermExplanation.getValue().doubleValue()) {
fail("expected: " + prevTermScore + ", got: " + prevTermExplanation);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fd7ead9/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java
index a9956b3..65f9599 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java
@@ -25,7 +25,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.LeafSimScorer;
/**
* Wraps a SpanWeight with additional asserts
@@ -58,7 +58,7 @@ public class AssertingSpanWeight extends SpanWeight {
}
@Override
- public Similarity.SimScorer getSimScorer(LeafReaderContext context) throws IOException {
+ public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
return in.getSimScorer(context);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fd7ead9/solr/core/src/test/org/apache/solr/search/similarities/BaseSimilarityTestCase.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/similarities/BaseSimilarityTestCase.java b/solr/core/src/test/org/apache/solr/search/similarities/BaseSimilarityTestCase.java
index d782c2f..6ce4331 100644
--- a/solr/core/src/test/org/apache/solr/search/similarities/BaseSimilarityTestCase.java
+++ b/solr/core/src/test/org/apache/solr/search/similarities/BaseSimilarityTestCase.java
@@ -29,7 +29,7 @@ public abstract class BaseSimilarityTestCase extends SolrTestCaseJ4 {
protected Similarity getSimilarity(String field) {
SolrCore core = h.getCore();
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
- Similarity sim = searcher.get().getSimilarity(true);
+ Similarity sim = searcher.get().getSimilarity();
searcher.decref();
while (sim instanceof PerFieldSimilarityWrapper) {
sim = ((PerFieldSimilarityWrapper)sim).get(field);