You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/07/04 01:26:45 UTC
svn commit: r1499601 [6/20] - in /lucene/dev/branches/security: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/idea/solr/core/src/test/ dev-tools/maven/ dev-tools/maven/lucene/
dev-tools/maven/lucene/analysis/stempel/ dev-to...
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java Wed Jul 3 23:26:32 2013
@@ -27,7 +27,7 @@ import org.apache.lucene.search.Explanat
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.search.spans.SpanTermQuery;
@@ -49,7 +49,7 @@ import java.io.IOException;
* which returns 1 by default.
* <p/>
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
- * @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
+ * @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
**/
public class PayloadTermQuery extends SpanTermQuery {
protected PayloadFunction function;
@@ -82,7 +82,7 @@ public class PayloadTermQuery extends Sp
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
- this, similarity.sloppySimScorer(stats, context));
+ this, similarity.simScorer(stats, context));
}
protected class PayloadTermSpanScorer extends SpanScorer {
@@ -91,7 +91,7 @@ public class PayloadTermQuery extends Sp
protected int payloadsSeen;
private final TermSpans termSpans;
- public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
+ public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
termSpans = spans;
}
@@ -182,7 +182,7 @@ public class PayloadTermQuery extends Sp
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
- SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
+ SimScorer docScorer = similarity.simScorer(stats, context);
Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java Wed Jul 3 23:26:32 2013
@@ -212,80 +212,18 @@ public class BM25Similarity extends Simi
}
@Override
- public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
+ public final SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
BM25Stats bm25stats = (BM25Stats) stats;
- final NumericDocValues norms = context.reader().getNormValues(bm25stats.field);
- return norms == null
- ? new ExactBM25DocScorerNoNorms(bm25stats)
- : new ExactBM25DocScorer(bm25stats, norms);
- }
-
- @Override
- public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- BM25Stats bm25stats = (BM25Stats) stats;
- return new SloppyBM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field));
- }
-
- private class ExactBM25DocScorer extends ExactSimScorer {
- private final BM25Stats stats;
- private final float weightValue;
- private final NumericDocValues norms;
- private final float[] cache;
-
- ExactBM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
- assert norms != null;
- this.stats = stats;
- this.weightValue = stats.weight * (k1 + 1); // boost * idf * (k1 + 1)
- this.cache = stats.cache;
- this.norms = norms;
- }
-
- @Override
- public float score(int doc, int freq) {
- return weightValue * freq / (freq + cache[(byte)norms.get(doc) & 0xFF]);
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- return explainScore(doc, freq, stats, norms);
- }
- }
-
- /** there are no norms, we act as if b=0 */
- private class ExactBM25DocScorerNoNorms extends ExactSimScorer {
- private final BM25Stats stats;
- private final float weightValue;
- private static final int SCORE_CACHE_SIZE = 32;
- private float[] scoreCache = new float[SCORE_CACHE_SIZE];
-
- ExactBM25DocScorerNoNorms(BM25Stats stats) {
- this.stats = stats;
- this.weightValue = stats.weight * (k1 + 1); // boost * idf * (k1 + 1)
- for (int i = 0; i < SCORE_CACHE_SIZE; i++)
- scoreCache[i] = weightValue * i / (i + k1);
- }
-
- @Override
- public float score(int doc, int freq) {
- // TODO: maybe score cache is more trouble than its worth?
- return freq < SCORE_CACHE_SIZE // check cache
- ? scoreCache[freq] // cache hit
- : weightValue * freq / (freq + k1); // cache miss
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- return explainScore(doc, freq, stats, null);
- }
+ return new BM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field));
}
- private class SloppyBM25DocScorer extends SloppySimScorer {
+ private class BM25DocScorer extends SimScorer {
private final BM25Stats stats;
private final float weightValue; // boost * idf * (k1 + 1)
private final NumericDocValues norms;
private final float[] cache;
- SloppyBM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
+ BM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.weight * (k1 + 1);
this.cache = stats.cache;
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java Wed Jul 3 23:26:32 2013
@@ -19,10 +19,40 @@ package org.apache.lucene.search.similar
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.SmallFloat;
-/** Expert: Default scoring implementation. */
+/**
+ * Expert: Default scoring implementation which {@link #encodeNormValue(float)
+ * encodes} norm values as a single byte before being stored. At search time,
+ * the norm byte value is read from the index
+ * {@link org.apache.lucene.store.Directory directory} and
+ * {@link #decodeNormValue(long) decoded} back to a float <i>norm</i> value.
+ * This encoding/decoding, while reducing index size, comes with the price of
+ * precision loss - it is not guaranteed that <i>decode(encode(x)) = x</i>. For
+ * instance, <i>decode(encode(0.89)) = 0.75</i>.
+ * <p>
+ * Compression of norm values to a single byte saves memory at search time,
+ * because once a field is referenced at search time, its norms - for all
+ * documents - are maintained in memory.
+ * <p>
+ * The rationale supporting such lossy compression of norm values is that given
+ * the difficulty (and inaccuracy) of users to express their true information
+ * need by a query, only big differences matter. <br>
+ * <br>
+ * Last, note that search time is too late to modify this <i>norm</i> part of
+ * scoring, e.g. by using a different {@link Similarity} for search.
+ */
public class DefaultSimilarity extends TFIDFSimilarity {
+ /** Cache of decoded bytes. */
+ private static final float[] NORM_TABLE = new float[256];
+
+ static {
+ for (int i = 0; i < 256; i++) {
+ NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
+ }
+ }
+
/** Sole constructor: parameter-free */
public DefaultSimilarity() {}
@@ -38,6 +68,35 @@ public class DefaultSimilarity extends T
return (float)(1.0 / Math.sqrt(sumOfSquaredWeights));
}
+ /**
+ * Encodes a normalization factor for storage in an index.
+ * <p>
+ * The encoding uses a three-bit mantissa, a five-bit exponent, and the
+ * zero-exponent point at 15, thus representing values from around 7x10^9 to
+ * 2x10^-9 with about one significant decimal digit of accuracy. Zero is also
+ * represented. Negative numbers are rounded up to zero. Values too large to
+ * represent are rounded down to the largest representable value. Positive
+ * values too small to represent are rounded up to the smallest positive
+ * representable value.
+ *
+ * @see org.apache.lucene.document.Field#setBoost(float)
+ * @see org.apache.lucene.util.SmallFloat
+ */
+ @Override
+ public final long encodeNormValue(float f) {
+ return SmallFloat.floatToByte315(f);
+ }
+
+ /**
+ * Decodes the norm value, assuming it is a single byte.
+ *
+ * @see #encodeNormValue(float)
+ */
+ @Override
+ public final float decodeNormValue(long norm) {
+ return NORM_TABLE[(int) (norm & 0xFF)]; // & 0xFF maps negative bytes to positive above 127
+ }
+
/** Implemented as
* <code>state.getBoost()*lengthNorm(numTerms)</code>, where
* <code>numTerms</code> is {@link FieldInvertState#getLength()} if {@link
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java Wed Jul 3 23:26:32 2013
@@ -57,60 +57,25 @@ public class MultiSimilarity extends Sim
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- ExactSimScorer subScorers[] = new ExactSimScorer[sims.length];
+ public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
+ SimScorer subScorers[] = new SimScorer[sims.length];
for (int i = 0; i < subScorers.length; i++) {
- subScorers[i] = sims[i].exactSimScorer(((MultiStats)stats).subStats[i], context);
- }
- return new MultiExactDocScorer(subScorers);
- }
-
- @Override
- public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- SloppySimScorer subScorers[] = new SloppySimScorer[sims.length];
- for (int i = 0; i < subScorers.length; i++) {
- subScorers[i] = sims[i].sloppySimScorer(((MultiStats)stats).subStats[i], context);
- }
- return new MultiSloppyDocScorer(subScorers);
- }
-
- static class MultiExactDocScorer extends ExactSimScorer {
- private final ExactSimScorer subScorers[];
-
- MultiExactDocScorer(ExactSimScorer subScorers[]) {
- this.subScorers = subScorers;
- }
-
- @Override
- public float score(int doc, int freq) {
- float sum = 0.0f;
- for (ExactSimScorer subScorer : subScorers) {
- sum += subScorer.score(doc, freq);
- }
- return sum;
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:");
- for (ExactSimScorer subScorer : subScorers) {
- expl.addDetail(subScorer.explain(doc, freq));
- }
- return expl;
+ subScorers[i] = sims[i].simScorer(((MultiStats)stats).subStats[i], context);
}
+ return new MultiSimScorer(subScorers);
}
- static class MultiSloppyDocScorer extends SloppySimScorer {
- private final SloppySimScorer subScorers[];
+ static class MultiSimScorer extends SimScorer {
+ private final SimScorer subScorers[];
- MultiSloppyDocScorer(SloppySimScorer subScorers[]) {
+ MultiSimScorer(SimScorer subScorers[]) {
this.subScorers = subScorers;
}
@Override
public float score(int doc, float freq) {
float sum = 0.0f;
- for (SloppySimScorer subScorer : subScorers) {
+ for (SimScorer subScorer : subScorers) {
sum += subScorer.score(doc, freq);
}
return sum;
@@ -119,7 +84,7 @@ public class MultiSimilarity extends Sim
@Override
public Explanation explain(int doc, Explanation freq) {
Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:");
- for (SloppySimScorer subScorer : subScorers) {
+ for (SimScorer subScorer : subScorers) {
expl.addDetail(subScorer.explain(doc, freq));
}
return expl;
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java Wed Jul 3 23:26:32 2013
@@ -54,15 +54,9 @@ public abstract class PerFieldSimilarity
}
@Override
- public final ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
+ public final SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
PerFieldSimWeight perFieldWeight = (PerFieldSimWeight) weight;
- return perFieldWeight.delegate.exactSimScorer(perFieldWeight.delegateWeight, context);
- }
-
- @Override
- public final SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
- PerFieldSimWeight perFieldWeight = (PerFieldSimWeight) weight;
- return perFieldWeight.delegate.sloppySimScorer(perFieldWeight.delegateWeight, context);
+ return perFieldWeight.delegate.simScorer(perFieldWeight.delegateWeight, context);
}
/**
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java Wed Jul 3 23:26:32 2013
@@ -88,10 +88,8 @@ import org.apache.lucene.util.SmallFloat
* is called for each query leaf node, {@link Similarity#queryNorm(float)} is called for the top-level
* query, and finally {@link Similarity.SimWeight#normalize(float, float)} passes down the normalization value
* and any top-level boosts (e.g. from enclosing {@link BooleanQuery}s).
- * <li>For each segment in the index, the Query creates a {@link #exactSimScorer(SimWeight, AtomicReaderContext)}
- * (for queries with exact frequencies such as TermQuerys and exact PhraseQueries) or a
- * {@link #sloppySimScorer(SimWeight, AtomicReaderContext)} (for queries with sloppy frequencies such as
- * SpanQuerys and sloppy PhraseQueries). The score() method is called for each matching document.
+ * <li>For each segment in the index, the Query creates a {@link #simScorer(SimWeight, AtomicReaderContext)}
+ * The score() method is called for each matching document.
* </ol>
* <p>
* <a name="explaintime"/>
@@ -166,76 +164,31 @@ public abstract class Similarity {
* @return SimWeight object with the information this Similarity needs to score a query.
*/
public abstract SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats);
-
- /**
- * Creates a new {@link Similarity.ExactSimScorer} to score matching documents from a segment of the inverted index.
- * @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
- * @param context segment of the inverted index to be scored.
- * @return ExactSimScorer for scoring documents across <code>context</code>
- * @throws IOException if there is a low-level I/O error
- */
- public abstract ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
-
+
/**
- * Creates a new {@link Similarity.SloppySimScorer} to score matching documents from a segment of the inverted index.
+ * Creates a new {@link Similarity.SimScorer} to score matching documents from a segment of the inverted index.
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
* @param context segment of the inverted index to be scored.
* @return SloppySimScorer for scoring documents across <code>context</code>
* @throws IOException if there is a low-level I/O error
*/
- public abstract SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
-
- /**
- * API for scoring exact queries such as {@link TermQuery} and
- * exact {@link PhraseQuery}.
- * <p>
- * Frequencies are integers (the term or phrase frequency within the document)
- */
- public static abstract class ExactSimScorer {
-
- /**
- * Sole constructor. (For invocation by subclass
- * constructors, typically implicit.)
- */
- public ExactSimScorer() {}
-
- /**
- * Score a single document
- * @param doc document id
- * @param freq term frequency
- * @return document's score
- */
- public abstract float score(int doc, int freq);
-
- /**
- * Explain the score for a single document
- * @param doc document id
- * @param freq Explanation of how the term frequency was computed
- * @return document's score
- */
- public Explanation explain(int doc, Explanation freq) {
- Explanation result = new Explanation(score(doc, (int)freq.getValue()),
- "score(doc=" + doc + ",freq=" + freq.getValue() +"), with freq of:");
- result.addDetail(freq);
- return result;
- }
- }
+ public abstract SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
/**
- * API for scoring "sloppy" queries such as {@link SpanQuery} and
- * sloppy {@link PhraseQuery}.
+ * API for scoring "sloppy" queries such as {@link TermQuery},
+ * {@link SpanQuery}, and {@link PhraseQuery}.
* <p>
* Frequencies are floating-point values: an approximate
* within-document frequency adjusted for "sloppiness" by
- * {@link SloppySimScorer#computeSlopFactor(int)}.
+ * {@link SimScorer#computeSlopFactor(int)}.
*/
- public static abstract class SloppySimScorer {
+ public static abstract class SimScorer {
/**
* Sole constructor. (For invocation by subclass
* constructors, typically implicit.)
*/
- public SloppySimScorer() {}
+ public SimScorer() {}
/**
* Score a single document
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java Wed Jul 3 23:26:32 2013
@@ -190,38 +190,20 @@ public abstract class SimilarityBase ext
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
+ public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation,
// scoring almost as if it were boolean query
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
- ExactSimScorer subScorers[] = new ExactSimScorer[subStats.length];
+ SimScorer subScorers[] = new SimScorer[subStats.length];
for (int i = 0; i < subScorers.length; i++) {
BasicStats basicstats = (BasicStats) subStats[i];
- subScorers[i] = new BasicExactDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
+ subScorers[i] = new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
}
- return new MultiSimilarity.MultiExactDocScorer(subScorers);
+ return new MultiSimilarity.MultiSimScorer(subScorers);
} else {
BasicStats basicstats = (BasicStats) stats;
- return new BasicExactDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
- }
- }
-
- @Override
- public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- if (stats instanceof MultiSimilarity.MultiStats) {
- // a multi term query (e.g. phrase). return the summation,
- // scoring almost as if it were boolean query
- SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
- SloppySimScorer subScorers[] = new SloppySimScorer[subStats.length];
- for (int i = 0; i < subScorers.length; i++) {
- BasicStats basicstats = (BasicStats) subStats[i];
- subScorers[i] = new BasicSloppyDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
- }
- return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
- } else {
- BasicStats basicstats = (BasicStats) stats;
- return new BasicSloppyDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
+ return new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
}
}
@@ -277,46 +259,17 @@ public abstract class SimilarityBase ext
// --------------------------------- Classes ---------------------------------
- /** Delegates the {@link #score(int, int)} and
- * {@link #explain(int, Explanation)} methods to
- * {@link SimilarityBase#score(BasicStats, float, float)} and
- * {@link SimilarityBase#explain(BasicStats, int, Explanation, float)},
- * respectively.
- */
- private class BasicExactDocScorer extends ExactSimScorer {
- private final BasicStats stats;
- private final NumericDocValues norms;
-
- BasicExactDocScorer(BasicStats stats, NumericDocValues norms) throws IOException {
- this.stats = stats;
- this.norms = norms;
- }
-
- @Override
- public float score(int doc, int freq) {
- // We have to supply something in case norms are omitted
- return SimilarityBase.this.score(stats, freq,
- norms == null ? 1F : decodeNormValue((byte)norms.get(doc)));
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- return SimilarityBase.this.explain(stats, doc, freq,
- norms == null ? 1F : decodeNormValue((byte)norms.get(doc)));
- }
- }
-
/** Delegates the {@link #score(int, float)} and
* {@link #explain(int, Explanation)} methods to
* {@link SimilarityBase#score(BasicStats, float, float)} and
* {@link SimilarityBase#explain(BasicStats, int, Explanation, float)},
* respectively.
*/
- private class BasicSloppyDocScorer extends SloppySimScorer {
+ private class BasicSimScorer extends SimScorer {
private final BasicStats stats;
private final NumericDocValues norms;
- BasicSloppyDocScorer(BasicStats stats, NumericDocValues norms) throws IOException {
+ BasicSimScorer(BasicStats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.norms = norms;
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java Wed Jul 3 23:26:32 2013
@@ -28,7 +28,6 @@ import org.apache.lucene.search.IndexSea
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.SmallFloat;
/**
@@ -496,27 +495,8 @@ import org.apache.lucene.util.SmallFloat
* <td></td>
* </tr>
* </table>
- * <br> <br>
- * However the resulted <i>norm</i> value is {@link #encodeNormValue(float) encoded} as a single byte
- * before being stored.
- * At search time, the norm byte value is read from the index
- * {@link org.apache.lucene.store.Directory directory} and
- * {@link #decodeNormValue(byte) decoded} back to a float <i>norm</i> value.
- * This encoding/decoding, while reducing index size, comes with the price of
- * precision loss - it is not guaranteed that <i>decode(encode(x)) = x</i>.
- * For instance, <i>decode(encode(0.89)) = 0.75</i>.
- * <br> <br>
- * Compression of norm values to a single byte saves memory at search time,
- * because once a field is referenced at search time, its norms - for
- * all documents - are maintained in memory.
- * <br> <br>
- * The rationale supporting such lossy compression of norm values is that
- * given the difficulty (and inaccuracy) of users to express their true information
- * need by a query, only big differences matter.
- * <br> <br>
- * Last, note that search time is too late to modify this <i>norm</i> part of scoring, e.g. by
- * using a different {@link Similarity} for search.
- * <br> <br>
+ * Note that search time is too late to modify this <i>norm</i> part of scoring,
+ * e.g. by using a different {@link Similarity} for search.
* </li>
* </ol>
*
@@ -572,25 +552,6 @@ public abstract class TFIDFSimilarity ex
* when <code>freq</code> is large, and smaller values when <code>freq</code>
* is small.
*
- * <p>The default implementation calls {@link #tf(float)}.
- *
- * @param freq the frequency of a term within a document
- * @return a score factor based on a term's within-document frequency
- */
- public float tf(int freq) {
- return tf((float)freq);
- }
-
- /** Computes a score factor based on a term or phrase's frequency in a
- * document. This value is multiplied by the {@link #idf(long, long)}
- * factor for each term in the query and these products are then summed to
- * form the initial score for a document.
- *
- * <p>Terms and phrases repeated in a document indicate the topic of the
- * document, so implementations of this method usually return larger values
- * when <code>freq</code> is large, and smaller values when <code>freq</code>
- * is small.
- *
* @param freq the frequency of a term within a document
* @return a score factor based on a term's within-document frequency
*/
@@ -655,7 +616,7 @@ public abstract class TFIDFSimilarity ex
/** Computes a score factor based on a term's document frequency (the number
* of documents which contain the term). This value is multiplied by the
- * {@link #tf(int)} factor for each term in the query and these products are
+ * {@link #tf(float)} factor for each term in the query and these products are
* then summed to form the initial score for a document.
*
* <p>Terms that occur in fewer documents are better indicators of topic, so
@@ -685,38 +646,15 @@ public abstract class TFIDFSimilarity ex
return encodeNormValue(normValue);
}
- /** Cache of decoded bytes. */
- private static final float[] NORM_TABLE = new float[256];
-
- static {
- for (int i = 0; i < 256; i++) {
- NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
- }
- }
-
- /** Decodes a normalization factor stored in an index.
+ /**
+ * Decodes a normalization factor stored in an index.
+ *
* @see #encodeNormValue(float)
*/
- public float decodeNormValue(byte b) {
- return NORM_TABLE[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127
- }
+ public abstract float decodeNormValue(long norm);
- /** Encodes a normalization factor for storage in an index.
- *
- * <p>The encoding uses a three-bit mantissa, a five-bit exponent, and
- * the zero-exponent point at 15, thus
- * representing values from around 7x10^9 to 2x10^-9 with about one
- * significant decimal digit of accuracy. Zero is also represented.
- * Negative numbers are rounded up to zero. Values too large to represent
- * are rounded down to the largest representable value. Positive values too
- * small to represent are rounded up to the smallest positive representable
- * value.
- * @see org.apache.lucene.document.Field#setBoost(float)
- * @see org.apache.lucene.util.SmallFloat
- */
- public byte encodeNormValue(float f) {
- return SmallFloat.floatToByte315(f);
- }
+ /** Encodes a normalization factor for storage in an index. */
+ public abstract long encodeNormValue(float f);
/** Computes the amount of a sloppy phrase match, based on an edit distance.
* This value is summed for each sloppy phrase match in a document to form
@@ -755,49 +693,17 @@ public abstract class TFIDFSimilarity ex
}
@Override
- public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
+ public final SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
IDFStats idfstats = (IDFStats) stats;
- return new ExactTFIDFDocScorer(idfstats, context.reader().getNormValues(idfstats.field));
- }
-
- @Override
- public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- IDFStats idfstats = (IDFStats) stats;
- return new SloppyTFIDFDocScorer(idfstats, context.reader().getNormValues(idfstats.field));
- }
-
- // TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot.
-
- private final class ExactTFIDFDocScorer extends ExactSimScorer {
- private final IDFStats stats;
- private final float weightValue;
- private final NumericDocValues norms;
-
- ExactTFIDFDocScorer(IDFStats stats, NumericDocValues norms) throws IOException {
- this.stats = stats;
- this.weightValue = stats.value;
- this.norms = norms;
- }
-
- @Override
- public float score(int doc, int freq) {
- final float raw = tf(freq)*weightValue; // compute tf(f)*weight
-
- return norms == null ? raw : raw * decodeNormValue((byte)norms.get(doc)); // normalize for field
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- return explainScore(doc, freq, stats, norms);
- }
+ return new TFIDFSimScorer(idfstats, context.reader().getNormValues(idfstats.field));
}
- private final class SloppyTFIDFDocScorer extends SloppySimScorer {
+ private final class TFIDFSimScorer extends SimScorer {
private final IDFStats stats;
private final float weightValue;
private final NumericDocValues norms;
- SloppyTFIDFDocScorer(IDFStats stats, NumericDocValues norms) throws IOException {
+ TFIDFSimScorer(IDFStats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.value;
this.norms = norms;
@@ -807,7 +713,7 @@ public abstract class TFIDFSimilarity ex
public float score(int doc, float freq) {
final float raw = tf(freq) * weightValue; // compute tf(f)*weight
- return norms == null ? raw : raw * decodeNormValue((byte)norms.get(doc)); // normalize for field
+ return norms == null ? raw : raw * decodeNormValue(norms.get(doc)); // normalize for field
}
@Override
@@ -894,8 +800,7 @@ public abstract class TFIDFSimilarity ex
fieldExpl.addDetail(stats.idf);
Explanation fieldNormExpl = new Explanation();
- float fieldNorm =
- norms!=null ? decodeNormValue((byte) norms.get(doc)) : 1.0f;
+ float fieldNorm = norms != null ? decodeNormValue(norms.get(doc)) : 1.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java Wed Jul 3 23:26:32 2013
@@ -34,9 +34,9 @@ public class SpanScorer extends Scorer {
protected int doc;
protected float freq;
protected int numMatches;
- protected final Similarity.SloppySimScorer docScorer;
+ protected final Similarity.SimScorer docScorer;
- protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer)
+ protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
throws IOException {
super(weight);
this.docScorer = docScorer;
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java Wed Jul 3 23:26:32 2013
@@ -23,7 +23,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;
import java.io.IOException;
@@ -86,7 +86,7 @@ public class SpanWeight extends Weight {
if (stats == null) {
return null;
} else {
- return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
+ return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context));
}
}
@@ -97,7 +97,7 @@ public class SpanWeight extends Weight {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
- SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
+ SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/store/Directory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/store/Directory.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/store/Directory.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/store/Directory.java Wed Jul 3 23:26:32 2013
@@ -21,6 +21,7 @@ import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Closeable;
+import java.nio.file.NoSuchFileException;
import java.util.Collection; // for javadocs
import org.apache.lucene.util.IOUtils;
@@ -70,12 +71,12 @@ public abstract class Directory implemen
* Returns the length of a file in the directory. This method follows the
* following contract:
* <ul>
- * <li>Throws {@link FileNotFoundException} if the file does not exist
+ * <li>Throws {@link FileNotFoundException} or {@link NoSuchFileException}
+ * if the file does not exist.
* <li>Returns a value ≥0 if the file exists, which specifies its length.
* </ul>
*
* @param name the name of the file for which to return the length.
- * @throws FileNotFoundException if the file does not exist.
* @throws IOException if there was an IO error while retrieving the file's
* length.
*/
@@ -106,7 +107,9 @@ public abstract class Directory implemen
* the only Directory implementations that respect this
* parameter are {@link FSDirectory} and {@link
* CompoundFileDirectory}.
- */
+ * <p>Throws {@link FileNotFoundException} or {@link NoSuchFileException}
+ * if the file does not exist.
+ */
public abstract IndexInput openInput(String name, IOContext context) throws IOException;
/** Construct a {@link Lock}.
@@ -223,6 +226,8 @@ public abstract class Directory implemen
* efficiently open one or more sliced {@link IndexInput} instances from a
* single file handle. The underlying file handle is kept open until the
* {@link IndexInputSlicer} is closed.
+ * <p>Throws {@link FileNotFoundException} or {@link NoSuchFileException}
+ * if the file does not exist.
*
* @throws IOException
* if an {@link IOException} occurs
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/Constants.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/Constants.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/Constants.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/Constants.java Wed Jul 3 23:26:32 2013
@@ -46,6 +46,8 @@ public final class Constants {
public static final boolean SUN_OS = OS_NAME.startsWith("SunOS");
/** True iff running on Mac OS X */
public static final boolean MAC_OS_X = OS_NAME.startsWith("Mac OS X");
+ /** True iff running on FreeBSD */
+ public static final boolean FREE_BSD = OS_NAME.startsWith("FreeBSD");
public static final String OS_ARCH = System.getProperty("os.arch");
public static final String OS_VERSION = System.getProperty("os.version");
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java Wed Jul 3 23:26:32 2013
@@ -17,9 +17,6 @@ package org.apache.lucene.util;
* limitations under the License.
*/
-// TODO: probably move this to core at some point (eg,
-// cutover kuromoji, synfilter, LookaheadTokenFilter)
-
/** Acts like forever growing T[], but internally uses a
* circular buffer to reuse instances of T.
*
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java Wed Jul 3 23:26:32 2013
@@ -219,7 +219,7 @@ final public class SpecialOperations {
/**
* Returns the set of accepted strings, assuming that at most
* <code>limit</code> strings are accepted. If more than <code>limit</code>
- * strings are accepted, null is returned. If <code>limit</code><0, then
+ * strings are accepted, the first limit strings found are returned. If <code>limit</code><0, then
* the limit is infinite.
*/
public static Set<IntsRef> getFiniteStrings(Automaton a, int limit) {
@@ -227,11 +227,9 @@ final public class SpecialOperations {
if (a.isSingleton()) {
if (limit > 0) {
strings.add(Util.toUTF32(a.singleton, new IntsRef()));
- } else {
- return null;
}
} else if (!getFiniteStrings(a.initial, new HashSet<State>(), strings, new IntsRef(), limit)) {
- return null;
+ return strings;
}
return strings;
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java Wed Jul 3 23:26:32 2013
@@ -19,21 +19,21 @@ package org.apache.lucene.util.fst;
import java.io.IOException;
-import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PagedGrowableWriter;
// Used to dedup states (lookup already-frozen states)
final class NodeHash<T> {
- private GrowableWriter table;
- private int count;
- private int mask;
+ private PagedGrowableWriter table;
+ private long count;
+ private long mask;
private final FST<T> fst;
private final FST.Arc<T> scratchArc = new FST.Arc<T>();
private final FST.BytesReader in;
public NodeHash(FST<T> fst, FST.BytesReader in) {
- table = new GrowableWriter(8, 16, PackedInts.COMPACT);
+ table = new PagedGrowableWriter(16, 1<<30, 8, PackedInts.COMPACT);
mask = 15;
this.fst = fst;
this.in = in;
@@ -69,10 +69,10 @@ final class NodeHash<T> {
// hash code for an unfrozen node. This must be identical
// to the un-frozen case (below)!!
- private int hash(Builder.UnCompiledNode<T> node) {
+ private long hash(Builder.UnCompiledNode<T> node) {
final int PRIME = 31;
//System.out.println("hash unfrozen");
- int h = 0;
+ long h = 0;
// TODO: maybe if number of arcs is high we can safely subsample?
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
final Builder.Arc<T> arc = node.arcs[arcIdx];
@@ -87,14 +87,14 @@ final class NodeHash<T> {
}
}
//System.out.println(" ret " + (h&Integer.MAX_VALUE));
- return h & Integer.MAX_VALUE;
+ return h & Long.MAX_VALUE;
}
// hash code for a frozen node
- private int hash(long node) throws IOException {
+ private long hash(long node) throws IOException {
final int PRIME = 31;
//System.out.println("hash frozen node=" + node);
- int h = 0;
+ long h = 0;
fst.readFirstRealTargetArc(node, scratchArc, in);
while(true) {
//System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal() + " pos=" + in.getPosition());
@@ -111,13 +111,13 @@ final class NodeHash<T> {
fst.readNextRealArc(scratchArc, in);
}
//System.out.println(" ret " + (h&Integer.MAX_VALUE));
- return h & Integer.MAX_VALUE;
+ return h & Long.MAX_VALUE;
}
public long add(Builder.UnCompiledNode<T> nodeIn) throws IOException {
- // System.out.println("hash: add count=" + count + " vs " + table.size());
- final int h = hash(nodeIn);
- int pos = h & mask;
+ //System.out.println("hash: add count=" + count + " vs " + table.size() + " mask=" + mask);
+ final long h = hash(nodeIn);
+ long pos = h & mask;
int c = 0;
while(true) {
final long v = table.get(pos);
@@ -128,7 +128,8 @@ final class NodeHash<T> {
assert hash(node) == h : "frozenHash=" + hash(node) + " vs h=" + h;
count++;
table.set(pos, node);
- if (table.size() < 2*count) {
+ // Rehash at 2/3 occupancy:
+ if (count > 2*table.size()/3) {
rehash();
}
return node;
@@ -144,7 +145,7 @@ final class NodeHash<T> {
// called only by rehash
private void addNew(long address) throws IOException {
- int pos = hash(address) & mask;
+ long pos = hash(address) & mask;
int c = 0;
while(true) {
if (table.get(pos) == 0) {
@@ -158,23 +159,15 @@ final class NodeHash<T> {
}
private void rehash() throws IOException {
- final GrowableWriter oldTable = table;
+ final PagedGrowableWriter oldTable = table;
- if (oldTable.size() >= Integer.MAX_VALUE/2) {
- throw new IllegalStateException("FST too large (> 2.1 GB)");
- }
-
- table = new GrowableWriter(oldTable.getBitsPerValue(), 2*oldTable.size(), PackedInts.COMPACT);
+ table = new PagedGrowableWriter(2*oldTable.size(), 1<<30, PackedInts.bitsRequired(count), PackedInts.COMPACT);
mask = table.size()-1;
- for(int idx=0;idx<oldTable.size();idx++) {
+ for(long idx=0;idx<oldTable.size();idx++) {
final long address = oldTable.get(idx);
if (address != 0) {
addNew(address);
}
}
}
-
- public int count() {
- return count;
- }
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/PositiveIntOutputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/PositiveIntOutputs.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/PositiveIntOutputs.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/PositiveIntOutputs.java Wed Jul 3 23:26:32 2013
@@ -33,26 +33,13 @@ public final class PositiveIntOutputs ex
private final static Long NO_OUTPUT = new Long(0);
- private final boolean doShare;
+ private final static PositiveIntOutputs singleton = new PositiveIntOutputs();
- private final static PositiveIntOutputs singletonShare = new PositiveIntOutputs(true);
- private final static PositiveIntOutputs singletonNoShare = new PositiveIntOutputs(false);
-
- private PositiveIntOutputs(boolean doShare) {
- this.doShare = doShare;
+ private PositiveIntOutputs() {
}
- /** Returns the instance of PositiveIntOutputs. */
public static PositiveIntOutputs getSingleton() {
- return getSingleton(true);
- }
-
- /** Expert: pass doShare=false to disable output sharing.
- * In some cases this may result in a smaller FST,
- * however it will also break methods like {@link
- * Util#getByOutput} and {@link Util#shortestPaths}. */
- public static PositiveIntOutputs getSingleton(boolean doShare) {
- return doShare ? singletonShare : singletonNoShare;
+ return singleton;
}
@Override
@@ -61,14 +48,10 @@ public final class PositiveIntOutputs ex
assert valid(output2);
if (output1 == NO_OUTPUT || output2 == NO_OUTPUT) {
return NO_OUTPUT;
- } else if (doShare) {
+ } else {
assert output1 > 0;
assert output2 > 0;
return Math.min(output1, output2);
- } else if (output1.equals(output2)) {
- return output1;
- } else {
- return NO_OUTPUT;
}
}
@@ -134,6 +117,6 @@ public final class PositiveIntOutputs ex
@Override
public String toString() {
- return "PositiveIntOutputs(doShare=" + doShare + ")";
+ return "PositiveIntOutputs";
}
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/Util.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/Util.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/Util.java Wed Jul 3 23:26:32 2013
@@ -93,9 +93,7 @@ public final class Util {
*
* <p>NOTE: this only works with {@code FST<Long>}, only
* works when the outputs are ascending in order with
- * the inputs and only works when you shared
- * the outputs (pass doShare=true to {@link
- * PositiveIntOutputs#getSingleton}).
+ * the inputs.
* For example, simple ordinals (0, 1,
* 2, ...), or file offets (when appending to a file)
* fit this. */
@@ -517,11 +515,7 @@ public final class Util {
}
/** Starting from node, find the top N min cost
- * completions to a final node.
- *
- * <p>NOTE: you must share the outputs when you build the
- * FST (pass doShare=true to {@link
- * PositiveIntOutputs#getSingleton}). */
+ * completions to a final node. */
public static <T> MinResult<T>[] shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN,
boolean allowEmptyString) throws IOException {
@@ -814,7 +808,7 @@ public final class Util {
final int charLimit = offset + length;
while(charIdx < charLimit) {
scratch.grow(intIdx+1);
- final int utf32 = Character.codePointAt(s, charIdx);
+ final int utf32 = Character.codePointAt(s, charIdx, charLimit);
scratch.ints[intIdx] = utf32;
charIdx += Character.charCount(utf32);
intIdx++;
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/package.html?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/package.html (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/fst/package.html Wed Jul 3 23:26:32 2013
@@ -43,7 +43,7 @@ FST Construction example:
String inputValues[] = {"cat", "dog", "dogs"};
long outputValues[] = {5, 7, 12};
- PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
BytesRef scratchBytes = new BytesRef();
IntsRef scratchInts = new IntsRef();
@@ -60,8 +60,7 @@ Retrieval by key:
</pre>
Retrieval by value:
<pre class="prettyprint">
- // Only works because outputs are also in sorted order, and
- // we passed 'true' for sharing to PositiveIntOutputs.getSingleton
+ // Only works because outputs are also in sorted order
IntsRef key = Util.getByOutput(fst, 12);
System.out.println(Util.toBytesRef(key, scratchBytes).utf8ToString()); // dogs
</pre>
@@ -77,7 +76,6 @@ Iterate over key-value pairs in sorted o
</pre>
N-shortest paths by weight:
<pre class="prettyprint">
- // Only works because we passed 'true' for sharing to PositiveIntOutputs.getSingleton
Comparator<Long> comparator = new Comparator<Long>() {
public int compare(Long left, Long right) {
return left.compareTo(right);
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java Wed Jul 3 23:26:32 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+
import java.util.Arrays;
import org.apache.lucene.util.ArrayUtil;
@@ -25,33 +27,37 @@ import org.apache.lucene.util.RamUsageEs
/** Common functionality shared by {@link AppendingLongBuffer} and {@link MonotonicAppendingLongBuffer}. */
abstract class AbstractAppendingLongBuffer {
- static final int BLOCK_BITS = 10;
- static final int MAX_PENDING_COUNT = 1 << BLOCK_BITS;
- static final int BLOCK_MASK = MAX_PENDING_COUNT - 1;
+ static final int MIN_PAGE_SIZE = 64;
+ // More than 1M doesn't really makes sense with these appending buffers
+ // since their goal is to try to have small numbers of bits per value
+ static final int MAX_PAGE_SIZE = 1 << 20;
+ final int pageShift, pageMask;
long[] minValues;
PackedInts.Reader[] deltas;
private long deltasBytes;
int valuesOff;
- long[] pending;
+ final long[] pending;
int pendingOff;
- AbstractAppendingLongBuffer(int initialBlockCount) {
- minValues = new long[16];
- deltas = new PackedInts.Reader[16];
- pending = new long[MAX_PENDING_COUNT];
+ AbstractAppendingLongBuffer(int initialBlockCount, int pageSize) {
+ minValues = new long[initialBlockCount];
+ deltas = new PackedInts.Reader[initialBlockCount];
+ pending = new long[pageSize];
+ pageShift = checkBlockSize(pageSize, MIN_PAGE_SIZE, MAX_PAGE_SIZE);
+ pageMask = pageSize - 1;
valuesOff = 0;
pendingOff = 0;
}
/** Get the number of values that have been added to the buffer. */
public final long size() {
- return valuesOff * (long) MAX_PENDING_COUNT + pendingOff;
+ return valuesOff * (long) pending.length + pendingOff;
}
/** Append a value to this buffer. */
public final void add(long l) {
- if (pendingOff == MAX_PENDING_COUNT) {
+ if (pendingOff == pending.length) {
// check size
if (deltas.length == valuesOff) {
final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
@@ -80,8 +86,8 @@ abstract class AbstractAppendingLongBuff
if (index < 0 || index >= size()) {
throw new IndexOutOfBoundsException("" + index);
}
- int block = (int) (index >> BLOCK_BITS);
- int element = (int) (index & BLOCK_MASK);
+ final int block = (int) (index >> pageShift);
+ final int element = (int) (index & pageMask);
return get(block, element);
}
@@ -99,7 +105,7 @@ abstract class AbstractAppendingLongBuff
if (valuesOff == 0) {
currentValues = pending;
} else {
- currentValues = new long[MAX_PENDING_COUNT];
+ currentValues = new long[pending.length];
fillValues();
}
}
@@ -115,7 +121,7 @@ abstract class AbstractAppendingLongBuff
public final long next() {
assert hasNext();
long result = currentValues[pOff++];
- if (pOff == MAX_PENDING_COUNT) {
+ if (pOff == pending.length) {
vOff += 1;
pOff = 0;
if (vOff <= valuesOff) {
@@ -139,6 +145,7 @@ abstract class AbstractAppendingLongBuff
public long ramBytesUsed() {
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
+ + 2 * RamUsageEstimator.NUM_BYTES_INT // pageShift, pageMask
+ RamUsageEstimator.NUM_BYTES_LONG // valuesBytes
+ RamUsageEstimator.sizeOf(pending)
+ RamUsageEstimator.sizeOf(minValues)
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java Wed Jul 3 23:26:32 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+
import java.io.IOException;
import java.util.Arrays;
@@ -24,22 +26,11 @@ import org.apache.lucene.store.DataOutpu
abstract class AbstractBlockPackedWriter {
+ static final int MIN_BLOCK_SIZE = 64;
static final int MAX_BLOCK_SIZE = 1 << (30 - 3);
static final int MIN_VALUE_EQUALS_0 = 1 << 0;
static final int BPV_SHIFT = 1;
- static void checkBlockSize(int blockSize) {
- if (blockSize <= 0 || blockSize > MAX_BLOCK_SIZE) {
- throw new IllegalArgumentException("blockSize must be > 0 and < " + MAX_BLOCK_SIZE + ", got " + blockSize);
- }
- if (blockSize < 64) {
- throw new IllegalArgumentException("blockSize must be >= 64, got " + blockSize);
- }
- if ((blockSize & (blockSize - 1)) != 0) {
- throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
- }
- }
-
static long zigZagEncode(long n) {
return (n >> 63) ^ (n << 1);
}
@@ -66,7 +57,7 @@ abstract class AbstractBlockPackedWriter
* @param blockSize the number of values of a single block, must be a multiple of <tt>64</tt>
*/
public AbstractBlockPackedWriter(DataOutput out, int blockSize) {
- checkBlockSize(blockSize);
+ checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
reset(out);
values = new long[blockSize];
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java Wed Jul 3 23:26:32 2013
@@ -27,9 +27,16 @@ import java.util.Arrays;
*/
public final class AppendingLongBuffer extends AbstractAppendingLongBuffer {
- /** Sole constructor. */
+ /** @param initialPageCount the initial number of pages
+ * @param pageSize the size of a single page */
+ public AppendingLongBuffer(int initialPageCount, int pageSize) {
+ super(initialPageCount, pageSize);
+ }
+
+ /** Create an {@link AppendingLongBuffer} with initialPageCount=16 and
+ * pageSize=1024. */
public AppendingLongBuffer() {
- super(16);
+ this(16, 1024);
}
@Override
@@ -43,8 +50,9 @@ public final class AppendingLongBuffer e
}
}
+ @Override
void packPendingValues() {
- assert pendingOff == MAX_PENDING_COUNT;
+ assert pendingOff == pending.length;
// compute max delta
long minValue = pending[0];
@@ -71,6 +79,7 @@ public final class AppendingLongBuffer e
}
/** Return an iterator over the values of this buffer. */
+ @Override
public Iterator iterator() {
return new Iterator();
}
@@ -78,20 +87,21 @@ public final class AppendingLongBuffer e
/** A long iterator. */
public final class Iterator extends AbstractAppendingLongBuffer.Iterator {
- private Iterator() {
+ Iterator() {
super();
}
+ @Override
void fillValues() {
if (vOff == valuesOff) {
currentValues = pending;
} else if (deltas[vOff] == null) {
Arrays.fill(currentValues, minValues[vOff]);
} else {
- for (int k = 0; k < MAX_PENDING_COUNT; ) {
- k += deltas[vOff].get(k, currentValues, k, MAX_PENDING_COUNT - k);
+ for (int k = 0; k < pending.length; ) {
+ k += deltas[vOff].get(k, currentValues, k, pending.length - k);
}
- for (int k = 0; k < MAX_PENDING_COUNT; ++k) {
+ for (int k = 0; k < pending.length; ++k) {
currentValues[k] += minValues[vOff];
}
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java Wed Jul 3 23:26:32 2013
@@ -17,11 +17,14 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.BPV_SHIFT;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_VALUE_EQUALS_0;
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.readVLong;
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
-import static org.apache.lucene.util.packed.BlockPackedWriter.BPV_SHIFT;
-import static org.apache.lucene.util.packed.BlockPackedWriter.MIN_VALUE_EQUALS_0;
-import static org.apache.lucene.util.packed.BlockPackedWriter.checkBlockSize;
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+import static org.apache.lucene.util.packed.PackedInts.numBlocks;
import java.io.IOException;
@@ -40,14 +43,10 @@ public final class BlockPackedReader {
/** Sole constructor. */
public BlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
- checkBlockSize(blockSize);
this.valueCount = valueCount;
- blockShift = Integer.numberOfTrailingZeros(blockSize);
+ blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
blockMask = blockSize - 1;
- final int numBlocks = (int) (valueCount / blockSize) + (valueCount % blockSize == 0 ? 0 : 1);
- if ((long) numBlocks * blockSize < valueCount) {
- throw new IllegalArgumentException("valueCount is too large for this block size");
- }
+ final int numBlocks = numBlocks(valueCount, blockSize);
long[] minValues = null;
subReaders = new PackedInts.Reader[numBlocks];
for (int i = 0; i < numBlocks; ++i) {
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java Wed Jul 3 23:26:32 2013
@@ -17,9 +17,13 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
-import static org.apache.lucene.util.packed.BlockPackedWriter.BPV_SHIFT;
-import static org.apache.lucene.util.packed.BlockPackedWriter.MIN_VALUE_EQUALS_0;
-import static org.apache.lucene.util.packed.BlockPackedWriter.checkBlockSize;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.BPV_SHIFT;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_VALUE_EQUALS_0;
+import static org.apache.lucene.util.packed.BlockPackedReaderIterator.readVLong;
+import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
import java.io.EOFException;
import java.io.IOException;
@@ -87,7 +91,7 @@ public final class BlockPackedReaderIter
* been used to write the stream
*/
public BlockPackedReaderIterator(DataInput in, int packedIntsVersion, int blockSize, long valueCount) {
- checkBlockSize(blockSize);
+ checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
this.packedIntsVersion = packedIntsVersion;
this.blockSize = blockSize;
this.values = new long[blockSize];
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/GrowableWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/GrowableWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/GrowableWriter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/GrowableWriter.java Wed Jul 3 23:26:32 2013
@@ -20,24 +20,35 @@ package org.apache.lucene.util.packed;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.RamUsageEstimator;
/**
* Implements {@link PackedInts.Mutable}, but grows the
* bit count of the underlying packed ints on-demand.
+ * <p>Beware that this class will accept to set negative values but in order
+ * to do this, it will grow the number of bits per value to 64.
*
* <p>@lucene.internal</p>
*/
-
public class GrowableWriter implements PackedInts.Mutable {
- private long currentMaxValue;
+ private long currentMask;
private PackedInts.Mutable current;
private final float acceptableOverheadRatio;
+ /**
+ * @param startBitsPerValue the initial number of bits per value, may grow depending on the data
+ * @param valueCount the number of values
+ * @param acceptableOverheadRatio an acceptable overhead ratio
+ */
public GrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
this.acceptableOverheadRatio = acceptableOverheadRatio;
current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
- currentMaxValue = PackedInts.maxValue(current.getBitsPerValue());
+ currentMask = mask(current.getBitsPerValue());
+ }
+
+ private static long mask(int bitsPerValue) {
+ return bitsPerValue == 64 ? ~0L : PackedInts.maxValue(bitsPerValue);
}
@Override
@@ -70,16 +81,16 @@ public class GrowableWriter implements P
}
private void ensureCapacity(long value) {
- assert value >= 0;
- if (value <= currentMaxValue) {
+ if ((value & currentMask) == value) {
return;
}
- final int bitsRequired = PackedInts.bitsRequired(value);
+ final int bitsRequired = value < 0 ? 64 : PackedInts.bitsRequired(value);
+ assert bitsRequired > current.getBitsPerValue();
final int valueCount = size();
PackedInts.Mutable next = PackedInts.getMutable(valueCount, bitsRequired, acceptableOverheadRatio);
PackedInts.copy(current, 0, next, 0, valueCount, PackedInts.DEFAULT_BUFFER_SIZE);
current = next;
- currentMaxValue = PackedInts.maxValue(current.getBitsPerValue());
+ currentMask = mask(current.getBitsPerValue());
}
@Override
@@ -109,6 +120,10 @@ public class GrowableWriter implements P
public int set(int index, long[] arr, int off, int len) {
long max = 0;
for (int i = off, end = off + len; i < end; ++i) {
+ // bitwise or is nice because either all values are positive and the
+ // or-ed result will require as many bits per value as the max of the
+ // values, or one of them is negative and the result will be negative,
+ // forcing GrowableWriter to use 64 bits per value
max |= arr[i];
}
ensureCapacity(max);
@@ -123,7 +138,12 @@ public class GrowableWriter implements P
@Override
public long ramBytesUsed() {
- return current.ramBytesUsed();
+ return RamUsageEstimator.alignObjectSize(
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ + RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ + RamUsageEstimator.NUM_BYTES_LONG
+ + RamUsageEstimator.NUM_BYTES_FLOAT)
+ + current.ramBytesUsed();
}
@Override
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java Wed Jul 3 23:26:32 2013
@@ -37,14 +37,22 @@ public final class MonotonicAppendingLon
return (n >> 63) ^ (n << 1);
}
- private float[] averages;
+ float[] averages;
- /** Sole constructor. */
+ /** @param initialPageCount the initial number of pages
+ * @param pageSize the size of a single page */
+ public MonotonicAppendingLongBuffer(int initialPageCount, int pageSize) {
+ super(initialPageCount, pageSize);
+ averages = new float[pending.length];
+ }
+
+ /** Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16
+ * and pageSize=1024. */
public MonotonicAppendingLongBuffer() {
- super(16);
- averages = new float[16];
+ this(16, 1024);
}
-
+
+ @Override
long get(int block, int element) {
if (block == valuesOff) {
return pending[element];
@@ -66,16 +74,16 @@ public final class MonotonicAppendingLon
@Override
void packPendingValues() {
- assert pendingOff == MAX_PENDING_COUNT;
+ assert pendingOff == pending.length;
minValues[valuesOff] = pending[0];
- averages[valuesOff] = (float) (pending[BLOCK_MASK] - pending[0]) / BLOCK_MASK;
+ averages[valuesOff] = (float) (pending[pending.length - 1] - pending[0]) / (pending.length - 1);
- for (int i = 0; i < MAX_PENDING_COUNT; ++i) {
+ for (int i = 0; i < pending.length; ++i) {
pending[i] = zigZagEncode(pending[i] - minValues[valuesOff] - (long) (averages[valuesOff] * (long) i));
}
long maxDelta = 0;
- for (int i = 0; i < MAX_PENDING_COUNT; ++i) {
+ for (int i = 0; i < pending.length; ++i) {
if (pending[i] < 0) {
maxDelta = -1;
break;
@@ -94,6 +102,7 @@ public final class MonotonicAppendingLon
}
/** Return an iterator over the values of this buffer. */
+ @Override
public Iterator iterator() {
return new Iterator();
}
@@ -105,18 +114,19 @@ public final class MonotonicAppendingLon
super();
}
+ @Override
void fillValues() {
if (vOff == valuesOff) {
currentValues = pending;
} else if (deltas[vOff] == null) {
- for (int k = 0; k < MAX_PENDING_COUNT; ++k) {
+ for (int k = 0; k < pending.length; ++k) {
currentValues[k] = minValues[vOff] + (long) (averages[vOff] * (long) k);
}
} else {
- for (int k = 0; k < MAX_PENDING_COUNT; ) {
- k += deltas[vOff].get(k, currentValues, k, MAX_PENDING_COUNT - k);
+ for (int k = 0; k < pending.length; ) {
+ k += deltas[vOff].get(k, currentValues, k, pending.length - k);
}
- for (int k = 0; k < MAX_PENDING_COUNT; ++k) {
+ for (int k = 0; k < pending.length; ++k) {
currentValues[k] = minValues[vOff] + (long) (averages[vOff] * (long) k) + zigZagDecode(currentValues[k]);
}
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java Wed Jul 3 23:26:32 2013
@@ -17,8 +17,11 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
-import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.checkBlockSize;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+import static org.apache.lucene.util.packed.PackedInts.numBlocks;
import java.io.IOException;
@@ -39,14 +42,10 @@ public final class MonotonicBlockPackedR
/** Sole constructor. */
public MonotonicBlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
- checkBlockSize(blockSize);
this.valueCount = valueCount;
- blockShift = Integer.numberOfTrailingZeros(blockSize);
+ blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
blockMask = blockSize - 1;
- final int numBlocks = (int) (valueCount / blockSize) + (valueCount % blockSize == 0 ? 0 : 1);
- if ((long) numBlocks * blockSize < valueCount) {
- throw new IllegalArgumentException("valueCount is too large for this block size");
- }
+ final int numBlocks = numBlocks(valueCount, blockSize);
minValues = new long[numBlocks];
averages = new float[numBlocks];
subReaders = new PackedInts.Reader[numBlocks];
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java Wed Jul 3 23:26:32 2013
@@ -213,6 +213,11 @@ public class PackedInts {
this.format = format;
this.bitsPerValue = bitsPerValue;
}
+
+ @Override
+ public String toString() {
+ return "FormatAndBits(format=" + format + " bitsPerValue=" + bitsPerValue + ")";
+ }
}
/**
@@ -1036,14 +1041,21 @@ public class PackedInts {
*/
public static Mutable getMutable(int valueCount,
int bitsPerValue, float acceptableOverheadRatio) {
- assert valueCount >= 0;
-
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
- switch (formatAndBits.format) {
+ return getMutable(valueCount, formatAndBits.bitsPerValue, formatAndBits.format);
+ }
+
+ /** Same as {@link #getMutable(int, int, float)} with a pre-computed number
+ * of bits per value and format.
+ * @lucene.internal */
+ public static Mutable getMutable(int valueCount,
+ int bitsPerValue, PackedInts.Format format) {
+ assert valueCount >= 0;
+ switch (format) {
case PACKED_SINGLE_BLOCK:
- return Packed64SingleBlock.create(valueCount, formatAndBits.bitsPerValue);
+ return Packed64SingleBlock.create(valueCount, bitsPerValue);
case PACKED:
- switch (formatAndBits.bitsPerValue) {
+ switch (bitsPerValue) {
case 8:
return new Direct8(valueCount);
case 16:
@@ -1063,7 +1075,7 @@ public class PackedInts {
}
break;
}
- return new Packed64(valueCount, formatAndBits.bitsPerValue);
+ return new Packed64(valueCount, bitsPerValue);
default:
throw new AssertionError();
}
@@ -1198,33 +1210,39 @@ public class PackedInts {
for (int i = 0; i < len; ++i) {
dest.set(destPos++, src.get(srcPos++));
}
- } else {
+ } else if (len > 0) {
// use bulk operations
- long[] buf = new long[Math.min(capacity, len)];
- int remaining = 0;
- while (len > 0) {
- final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
- assert read > 0;
- srcPos += read;
- len -= read;
- remaining += read;
- final int written = dest.set(destPos, buf, 0, remaining);
- assert written > 0;
- destPos += written;
- if (written < remaining) {
- System.arraycopy(buf, written, buf, 0, remaining - written);
- }
- remaining -= written;
- }
- while (remaining > 0) {
- final int written = dest.set(destPos, buf, 0, remaining);
- destPos += written;
- remaining -= written;
- System.arraycopy(buf, written, buf, 0, remaining);
+ final long[] buf = new long[Math.min(capacity, len)];
+ copy(src, srcPos, dest, destPos, len, buf);
+ }
+ }
+
+ /** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */
+ static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) {
+ assert buf.length > 0;
+ int remaining = 0;
+ while (len > 0) {
+ final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
+ assert read > 0;
+ srcPos += read;
+ len -= read;
+ remaining += read;
+ final int written = dest.set(destPos, buf, 0, remaining);
+ assert written > 0;
+ destPos += written;
+ if (written < remaining) {
+ System.arraycopy(buf, written, buf, 0, remaining - written);
}
+ remaining -= written;
+ }
+ while (remaining > 0) {
+ final int written = dest.set(destPos, buf, 0, remaining);
+ destPos += written;
+ remaining -= written;
+ System.arraycopy(buf, written, buf, 0, remaining);
}
}
-
+
/**
* Expert: reads only the metadata from a stream. This is useful to later
* restore a stream or open a direct reader via
@@ -1261,4 +1279,26 @@ public class PackedInts {
}
}
-}
\ No newline at end of file
+ /** Check that the block size is a power of 2, in the right bounds, and return
+ * its log in base 2. */
+ static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) {
+ if (blockSize < minBlockSize || blockSize > maxBlockSize) {
+ throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize);
+ }
+ if ((blockSize & (blockSize - 1)) != 0) {
+ throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
+ }
+ return Integer.numberOfTrailingZeros(blockSize);
+ }
+
+ /** Return the number of blocks required to store <code>size</code> values on
+ * <code>blockSize</code>. */
+ static int numBlocks(long size, int blockSize) {
+ final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1);
+ if ((long) numBlocks * blockSize < size) {
+ throw new IllegalArgumentException("size is too large for this block size");
+ }
+ return numBlocks;
+ }
+
+}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/package.html?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/package.html (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/util/packed/package.html Wed Jul 3 23:26:32 2013
@@ -47,6 +47,11 @@
<li>Same as PackedInts.Mutable but grows the number of bits per values when needed.</li>
<li>Useful to build a PackedInts.Mutable from a read-once stream of longs.</li>
</ul></li>
+ <li><b>{@link org.apache.lucene.util.packed.PagedGrowableWriter}</b><ul>
+ <li>Slices data into fixed-size blocks stored in GrowableWriters.</li>
+ <li>Supports more than 2B values.</li>
+ <li>You should use AppendingLongBuffer instead if you don't need random write access.</li>
+ </ul></li>
<li><b>{@link org.apache.lucene.util.packed.AppendingLongBuffer}</b><ul>
<li>Can store any sequence of longs.</li>
<li>Compression is good when values are close to each other.</li>
Modified: lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/TestSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/TestSearch.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/TestSearch.java (original)
+++ lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/TestSearch.java Wed Jul 3 23:26:32 2013
@@ -112,10 +112,7 @@ public class TestSearch extends LuceneTe
Analyzer analyzer = new MockAnalyzer(random);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
MergePolicy mp = conf.getMergePolicy();
- if (mp instanceof LogMergePolicy) {
- ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
- }
-
+ mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(directory, conf);
String[] docs = {
@@ -130,7 +127,7 @@ public class TestSearch extends LuceneTe
for (int j = 0; j < docs.length; j++) {
Document d = new Document();
d.add(newTextField("contents", docs[j], Field.Store.YES));
- d.add(newStringField("id", ""+j, Field.Store.NO));
+ d.add(new IntField("id", j, Field.Store.NO));
writer.addDocument(d);
}
writer.close();
Modified: lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/TestSearchForDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/TestSearchForDuplicates.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/TestSearchForDuplicates.java (original)
+++ lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/TestSearchForDuplicates.java Wed Jul 3 23:26:32 2013
@@ -72,9 +72,7 @@ public class TestSearchForDuplicates ext
Analyzer analyzer = new MockAnalyzer(random);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
final MergePolicy mp = conf.getMergePolicy();
- if (mp instanceof LogMergePolicy) {
- ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFiles);
- }
+ mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(directory, conf);
if (VERBOSE) {
System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS);
@@ -83,7 +81,7 @@ public class TestSearchForDuplicates ext
for (int j = 0; j < MAX_DOCS; j++) {
Document d = new Document();
d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES));
- d.add(newTextField(ID_FIELD, Integer.toString(j), Field.Store.YES));
+ d.add(new IntField(ID_FIELD, j, Field.Store.YES));
writer.addDocument(d);
}
writer.close();
Modified: lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java (original)
+++ lucene/dev/branches/security/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java Wed Jul 3 23:26:32 2013
@@ -49,6 +49,7 @@ public class TestCompressingStoredFields
iwConf.setCodec(CompressingCodec.randomInstance(random()));
// disable CFS because this test checks file names
iwConf.setMergePolicy(newLogMergePolicy(false));
+ iwConf.setUseCompoundFile(false);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
final Document validDoc = new Document();