You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/14 15:51:59 UTC
svn commit: r1103112 [21/24] - in /lucene/dev/branches/flexscoring: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/db/bdb-je/
dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea/lucene/cont...
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/FacetComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/FacetComponent.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/FacetComponent.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/FacetComponent.java Sat May 14 13:51:35 2011
@@ -17,23 +17,23 @@
package org.apache.solr.handler.component;
-import java.io.IOException;
-import java.net.URL;
-import java.util.*;
-
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
-import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.common.SolrException;
import org.apache.solr.request.SimpleFacets;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.solr.search.QueryParsing;
import org.apache.solr.schema.FieldType;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.solr.search.QueryParsing;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.*;
/**
* TODO!
@@ -222,11 +222,37 @@ public class FacetComponent extends Sear
sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT);
sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
- dff.initialLimit = dff.offset + dff.limit;
+ dff.initialLimit = dff.limit <= 0 ? dff.limit : dff.offset + dff.limit;
- if(dff.sort.equals(FacetParams.FACET_SORT_COUNT) && dff.limit > 0) {
- // set the initial limit higher to increase accuracy
- dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10;
+ if (dff.sort.equals(FacetParams.FACET_SORT_COUNT)) {
+ if (dff.limit > 0) {
+ // set the initial limit higher to increase accuracy
+ dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10;
+ dff.initialMincount = 0; // TODO: we could change this to 1, but would then need more refinement for small facet result sets?
+ } else {
+ // if limit==-1, then no need to artificially lower mincount to 0 if it's 1
+ dff.initialMincount = Math.min(dff.minCount, 1);
+ }
+ } else {
+ // we're sorting by index order.
+ // if minCount==0, we should always be able to get accurate results w/o over-requesting or refining
+ // if minCount==1, we should be able to get accurate results w/o over-requesting, but we'll need to refine
+ // if minCount==n (>1), we can set the initialMincount to minCount/nShards, rounded up.
+ // For example, we know that if minCount=10 and we have 3 shards, then at least one shard must have a count of 4 for the term
+ // For the minCount>1 case, we can generate too short of a list (miss terms at the end of the list) unless limit==-1
+ // For example: each shard could produce a list of top 10, but some of those could fail to make it into the combined list (i.e.
+ // we needed to go beyond the top 10 to generate the top 10 combined). Overrequesting can help a little here, but not as
+ // much as when sorting by count.
+ if (dff.minCount <= 1) {
+ dff.initialMincount = dff.minCount;
+ } else {
+ dff.initialMincount = (int)Math.ceil((double)dff.minCount / rb.slices.length);
+ // dff.initialMincount = 1;
+ }
+ }
+
+ if (dff.initialMincount != 0) {
+ sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount);
}
// Currently this is for testing only and allows overriding of the
@@ -286,8 +312,95 @@ public class FacetComponent extends Sear
dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit);
}
}
- }
+ // Distributed facet_dates
+ //
+ // The implementation below uses the first encountered shard's
+ // facet_dates as the basis for subsequent shards' data to be merged.
+ // (the "NOW" param should ensure consistency)
+ @SuppressWarnings("unchecked")
+ SimpleOrderedMap<SimpleOrderedMap<Object>> facet_dates =
+ (SimpleOrderedMap<SimpleOrderedMap<Object>>)
+ facet_counts.get("facet_dates");
+
+ if (facet_dates != null) {
+
+ // go through each facet_date
+ for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_dates) {
+ final String field = entry.getKey();
+ if (fi.dateFacets.get(field) == null) {
+ // first time we've seen this field, no merging
+ fi.dateFacets.add(field, entry.getValue());
+
+ } else {
+ // not the first time, merge current field
+
+ SimpleOrderedMap<Object> shardFieldValues
+ = entry.getValue();
+ SimpleOrderedMap<Object> existFieldValues
+ = fi.dateFacets.get(field);
+
+ for (Map.Entry<String,Object> existPair : existFieldValues) {
+ final String key = existPair.getKey();
+ if (key.equals("gap") ||
+ key.equals("end") ||
+ key.equals("start")) {
+ // we can skip these, must all be the same across shards
+ continue;
+ }
+ // can be null if inconsistencies in shards responses
+ Integer newValue = (Integer) shardFieldValues.get(key);
+ if (null != newValue) {
+ Integer oldValue = ((Integer) existPair.getValue());
+ existPair.setValue(oldValue + newValue);
+ }
+ }
+ }
+ }
+ }
+
+ // Distributed facet_ranges
+ //
+ // The implementation below uses the first encountered shard's
+ // facet_ranges as the basis for subsequent shards' data to be merged.
+ @SuppressWarnings("unchecked")
+ SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
+ (SimpleOrderedMap<SimpleOrderedMap<Object>>)
+ facet_counts.get("facet_ranges");
+
+ if (facet_ranges != null) {
+
+ // go through each facet_range
+ for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_ranges) {
+ final String field = entry.getKey();
+ if (fi.rangeFacets.get(field) == null) {
+ // first time we've seen this field, no merging
+ fi.rangeFacets.add(field, entry.getValue());
+
+ } else {
+ // not the first time, merge current field counts
+
+ @SuppressWarnings("unchecked")
+ NamedList<Integer> shardFieldValues
+ = (NamedList<Integer>) entry.getValue().get("counts");
+
+ @SuppressWarnings("unchecked")
+ NamedList<Integer> existFieldValues
+ = (NamedList<Integer>) fi.rangeFacets.get(field).get("counts");
+
+ for (Map.Entry<String,Integer> existPair : existFieldValues) {
+ final String key = existPair.getKey();
+ // can be null if inconsistencies in shards responses
+ Integer newValue = shardFieldValues.get(key);
+ if (null != newValue) {
+ Integer oldValue = existPair.getValue();
+ existPair.setValue(oldValue + newValue);
+ }
+ }
+ }
+ }
+ }
+ }
//
// This code currently assumes that there will be only a single
@@ -296,15 +409,18 @@ public class FacetComponent extends Sear
//
for (DistribFieldFacet dff : fi.facets.values()) {
- if (dff.limit <= 0) continue; // no need to check these facets for refinement
- if (dff.minCount <= 1 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue;
+ // no need to check these facets for refinement
+ if (dff.initialLimit <= 0 && dff.initialMincount == 0) continue;
- @SuppressWarnings("unchecked") // generic array's are anoying
+ // only other case where index-sort doesn't need refinement is if minCount==0
+ if (dff.minCount == 0 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue;
+
+ @SuppressWarnings("unchecked") // generic array's are annoying
List<String>[] tmp = (List<String>[]) new List[rb.shards.length];
dff._toRefine = tmp;
ShardFacetCount[] counts = dff.getCountSorted();
- int ntop = Math.min(counts.length, dff.offset + dff.limit);
+ int ntop = Math.min(counts.length, dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
long smallestCount = counts.length == 0 ? 0 : counts[ntop-1].count;
for (int i=0; i<counts.length; i++) {
@@ -313,8 +429,11 @@ public class FacetComponent extends Sear
if (i<ntop) {
// automatically flag the top values for refinement
+ // this should always be true for facet.sort=index
needRefinement = true;
} else {
+ // this logic should only be invoked for facet.sort=index (for now)
+
// calculate the maximum value that this term may have
// and if it is >= smallestCount, then flag for refinement
long maxCount = sfc.count;
@@ -422,13 +541,32 @@ public class FacetComponent extends Sear
counts = dff.getLexSorted();
}
- int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length);
- for (int i=dff.offset; i<end; i++) {
- if (counts[i].count < dff.minCount) {
- if (countSorted) break; // if sorted by count, we can break out of loop early
- else continue;
+ if (countSorted) {
+ int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length);
+ for (int i=dff.offset; i<end; i++) {
+ if (counts[i].count < dff.minCount) {
+ break;
+ }
+ fieldCounts.add(counts[i].name, num(counts[i].count));
+ }
+ } else {
+ int off = dff.offset;
+ int lim = dff.limit >= 0 ? dff.limit : Integer.MAX_VALUE;
+
+ // index order...
+ for (int i=0; i<counts.length; i++) {
+ long count = counts[i].count;
+ if (count < dff.minCount) continue;
+ if (off > 0) {
+ off--;
+ continue;
+ }
+ if (lim <= 0) {
+ break;
+ }
+ lim--;
+ fieldCounts.add(counts[i].name, num(count));
}
- fieldCounts.add(counts[i].name, num(counts[i].count));
}
if (dff.missing) {
@@ -436,9 +574,8 @@ public class FacetComponent extends Sear
}
}
- // TODO: facet dates & numbers
- facet_counts.add("facet_dates", new SimpleOrderedMap());
- facet_counts.add("facet_ranges", new SimpleOrderedMap());
+ facet_counts.add("facet_dates", fi.dateFacets);
+ facet_counts.add("facet_ranges", fi.rangeFacets);
rb.rsp.add("facet_counts", facet_counts);
@@ -490,8 +627,14 @@ public class FacetComponent extends Sear
* <b>This API is experimental and subject to change</b>
*/
public static class FacetInfo {
+
public LinkedHashMap<String,QueryFacet> queryFacets;
public LinkedHashMap<String,DistribFieldFacet> facets;
+ public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
+ = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
+ public SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacets
+ = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
+
public List<String> exceptionList;
void parse(SolrParams params, ResponseBuilder rb) {
@@ -631,7 +774,8 @@ public class FacetComponent extends Sear
public HashMap<String,ShardFacetCount> counts = new HashMap<String,ShardFacetCount>(128);
public int termNum;
- public int initialLimit; // how many terms requested in first phase
+ public int initialLimit; // how many terms requested in first phase
+ public int initialMincount; // mincount param sent to each shard
public boolean needRefinements;
public ShardFacetCount[] countSorted;
@@ -671,11 +815,10 @@ public class FacetComponent extends Sear
}
}
- // the largest possible missing term is 0 if we received less
- // than the number requested (provided mincount==0 like it should be for
- // a shard request)
+ // the largest possible missing term is initialMincount if we received less
+ // than the number requested.
if (numRequested<0 || numRequested != 0 && numReceived < numRequested) {
- last = 0;
+ last = initialMincount;
}
missingMaxPossible += last;
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java Sat May 14 13:51:35 2011
@@ -163,6 +163,25 @@ public class ResponseBuilder
debugInfo.add( name, val );
}
+ public void addDebug(Object val, String... path) {
+ if( debugInfo == null ) {
+ debugInfo = new SimpleOrderedMap<Object>();
+ }
+
+ NamedList<Object> target = debugInfo;
+ for (int i=0; i<path.length-1; i++) {
+ String elem = path[i];
+ NamedList<Object> newTarget = (NamedList<Object>)debugInfo.get(elem);
+ if (newTarget == null) {
+ newTarget = new SimpleOrderedMap<Object>();
+ target.add(elem, newTarget);
+ }
+ target = newTarget;
+ }
+
+ target.add(path[path.length-1], val);
+ }
+
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Sat May 14 13:51:35 2011
@@ -162,7 +162,7 @@ public class SpellCheckComponent extends
} else {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
- "Specified dictionary does not exist.");
+ "Specified dictionary does not exist: " + getDictionaryName(params));
}
}
}
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java Sat May 14 13:51:35 2011
@@ -208,7 +208,7 @@ public class TermVectorComponent extends
if (keyField != null) {
Document document = reader.document(docId, fieldSelector);
- Fieldable uniqId = document.getField(uniqFieldName);
+ Fieldable uniqId = document.getFieldable(uniqFieldName);
String uniqVal = null;
if (uniqId != null) {
uniqVal = keyField.getType().storedToReadable(uniqId);
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Sat May 14 13:51:35 2011
@@ -401,13 +401,24 @@ public class DefaultSolrHighlighter exte
private void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries,
int docId, Document doc, String fieldName ) throws IOException {
+ final SolrIndexSearcher searcher = req.getSearcher();
+ final IndexSchema schema = searcher.getSchema();
+
+ // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
+ // so we disable them until fixed (see LUCENE-3080)!
+ // BEGIN: Hack
+ final SchemaField schemaField = schema.getFieldOrNull(fieldName);
+ if (schemaField != null && (
+ (schemaField.getType() instanceof org.apache.solr.schema.TrieField) ||
+ (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)
+ )) return;
+ // END: Hack
+
SolrParams params = req.getParams();
String[] docTexts = doc.getValues(fieldName);
// according to Document javadoc, doc.getValues() never returns null. check empty instead of null
if (docTexts.length == 0) return;
- SolrIndexSearcher searcher = req.getSearcher();
- IndexSchema schema = searcher.getSchema();
TokenStream tstream = null;
int numFragments = getMaxSnippets(fieldName, params);
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
@@ -435,12 +446,20 @@ public class DefaultSolrHighlighter exte
// fall back to analyzer
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
}
-
+
+ int maxCharsToAnalyze = params.getFieldInt(fieldName,
+ HighlightParams.MAX_CHARS,
+ Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
+
Highlighter highlighter;
if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
// TODO: this is not always necessary - eventually we would like to avoid this wrap
// when it is not needed.
- tstream = new CachingTokenFilter(tstream);
+ if (maxCharsToAnalyze < 0) {
+ tstream = new CachingTokenFilter(tstream);
+ } else {
+ tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
+ }
// get highlighter
highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);
@@ -453,9 +472,6 @@ public class DefaultSolrHighlighter exte
highlighter = getHighlighter(query, fieldName, req);
}
- int maxCharsToAnalyze = params.getFieldInt(fieldName,
- HighlightParams.MAX_CHARS,
- Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
if (maxCharsToAnalyze < 0) {
highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
} else {
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/SimpleFacets.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/SimpleFacets.java Sat May 14 13:51:35 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.Direct16;
import org.apache.lucene.util.packed.Direct32;
import org.apache.lucene.util.packed.Direct8;
@@ -682,14 +683,15 @@ public class SimpleFacets {
if (deState==null) {
deState = new SolrIndexSearcher.DocsEnumState();
+ deState.fieldName = StringHelper.intern(field);
deState.deletedDocs = MultiFields.getDeletedDocs(r);
deState.termsEnum = termsEnum;
- deState.reuse = docsEnum;
+ deState.docsEnum = docsEnum;
}
- c = searcher.numDocs(new TermQuery(t), docs, deState);
+ c = searcher.numDocs(docs, deState);
- docsEnum = deState.reuse;
+ docsEnum = deState.docsEnum;
} else {
// iterate over TermDocs to calculate the intersection
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/SolrRequestInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/SolrRequestInfo.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/SolrRequestInfo.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/SolrRequestInfo.java Sat May 14 13:51:35 2011
@@ -17,11 +17,15 @@
package org.apache.solr.request;
+import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.response.SolrQueryResponse;
+import java.io.Closeable;
import java.util.Date;
+import java.util.LinkedList;
+import java.util.List;
public class SolrRequestInfo {
@@ -31,6 +35,8 @@ public class SolrRequestInfo {
protected SolrQueryResponse rsp;
protected Date now;
protected ResponseBuilder rb;
+ protected List<Closeable> closeHooks;
+
public static SolrRequestInfo getRequestInfo() {
return threadLocal.get();
@@ -48,7 +54,20 @@ public class SolrRequestInfo {
}
public static void clearRequestInfo() {
- threadLocal.remove();
+ try {
+ SolrRequestInfo info = threadLocal.get();
+ if (info != null && info.closeHooks != null) {
+ for (Closeable hook : info.closeHooks) {
+ try {
+ hook.close();
+ } catch (Throwable throwable) {
+ SolrException.log(SolrCore.log, "Exception during close hook", throwable);
+ }
+ }
+ }
+ } finally {
+ threadLocal.remove();
+ }
}
public SolrRequestInfo(SolrQueryRequest req, SolrQueryResponse rsp) {
@@ -88,4 +107,14 @@ public class SolrRequestInfo {
public void setResponseBuilder(ResponseBuilder rb) {
this.rb = rb;
}
+
+ public void addCloseHook(Closeable hook) {
+ // is this better here, or on SolrQueryRequest?
+ synchronized (this) {
+ if (closeHooks == null) {
+ closeHooks = new LinkedList<Closeable>();
+ }
+ closeHooks.add(hook);
+ }
+ }
}
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/request/UnInvertedField.java Sat May 14 13:51:35 2011
@@ -18,16 +18,12 @@
package org.apache.solr.request;
import org.apache.lucene.search.FieldCache;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
-import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.StringHelper;
import org.apache.noggit.CharArr;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList;
@@ -44,15 +40,11 @@ import org.apache.solr.handler.component
import org.apache.solr.handler.component.FieldFacetStats;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Bits;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
-import java.util.Comparator;
import java.util.concurrent.atomic.AtomicLong;
@@ -86,7 +78,7 @@ import java.util.concurrent.atomic.Atomi
* much like Lucene's own internal term index).
*
*/
-public class UnInvertedField {
+public class UnInvertedField extends DocTermOrds {
private static int TNUM_OFFSET=2;
static class TopTerm {
@@ -100,362 +92,113 @@ public class UnInvertedField {
}
}
- String field;
- int numTermsInField;
- int termsInverted; // number of unique terms that were un-inverted
- long termInstances; // total number of references to term numbers
- final TermIndex ti;
long memsz;
- int total_time; // total time to uninvert the field
- int phase1_time; // time for phase1 of the uninvert process
final AtomicLong use = new AtomicLong(); // number of uses
- int[] index;
- byte[][] tnums = new byte[256][];
- int[] maxTermCounts;
+ int[] maxTermCounts = new int[1024];
+
final Map<Integer,TopTerm> bigTerms = new LinkedHashMap<Integer,TopTerm>();
+ private SolrIndexSearcher.DocsEnumState deState;
+ private final SolrIndexSearcher searcher;
+
+ @Override
+ protected void visitTerm(TermsEnum te, int termNum) throws IOException {
+
+ if (termNum >= maxTermCounts.length) {
+ // resize by doubling - for very large number of unique terms, expanding
+ // by 4K and resultant GC will dominate uninvert times. Resize at end if material
+ int[] newMaxTermCounts = new int[maxTermCounts.length*2];
+ System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
+ maxTermCounts = newMaxTermCounts;
+ }
+
+ final BytesRef term = te.term();
+
+ if (te.docFreq() > maxTermDocFreq) {
+ TopTerm topTerm = new TopTerm();
+ topTerm.term = new BytesRef(term);
+ topTerm.termNum = termNum;
+ bigTerms.put(topTerm.termNum, topTerm);
+
+ if (deState == null) {
+ deState = new SolrIndexSearcher.DocsEnumState();
+ deState.fieldName = StringHelper.intern(field);
+ // deState.termsEnum = te.tenum;
+ deState.termsEnum = te; // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
+ deState.docsEnum = docsEnum;
+ deState.minSetSizeCached = maxTermDocFreq;
+ }
+ docsEnum = deState.docsEnum;
+ DocSet set = searcher.getDocSet(deState);
+ maxTermCounts[termNum] = set.size();
+ }
+ }
+
+ @Override
+ protected void setActualDocFreq(int termNum, int docFreq) {
+ maxTermCounts[termNum] = docFreq;
+ }
public long memSize() {
// can cache the mem size since it shouldn't change
if (memsz!=0) return memsz;
- long sz = 8*8 + 32; // local fields
+ long sz = super.ramUsedInBytes();
+ sz += 8*8 + 32; // local fields
sz += bigTerms.size() * 64;
for (TopTerm tt : bigTerms.values()) {
sz += tt.memSize();
}
- if (index != null) sz += index.length * 4;
- if (tnums!=null) {
- for (byte[] arr : tnums)
- if (arr != null) sz += arr.length;
- }
if (maxTermCounts != null)
sz += maxTermCounts.length * 4;
- sz += ti.memSize();
+ if (indexedTermsArray != null) {
+ // assume 8 byte references?
+ sz += 8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings;
+ }
memsz = sz;
return sz;
}
-
- /** Number of bytes to represent an unsigned int as a vint. */
- static int vIntSize(int x) {
- if ((x & (0xffffffff << (7*1))) == 0 ) {
- return 1;
- }
- if ((x & (0xffffffff << (7*2))) == 0 ) {
- return 2;
- }
- if ((x & (0xffffffff << (7*3))) == 0 ) {
- return 3;
- }
- if ((x & (0xffffffff << (7*4))) == 0 ) {
- return 4;
- }
- return 5;
- }
-
-
- // todo: if we know the size of the vInt already, we could do
- // a single switch on the size
- static int writeInt(int x, byte[] arr, int pos) {
- int a;
- a = (x >>> (7*4));
- if (a != 0) {
- arr[pos++] = (byte)(a | 0x80);
- }
- a = (x >>> (7*3));
- if (a != 0) {
- arr[pos++] = (byte)(a | 0x80);
- }
- a = (x >>> (7*2));
- if (a != 0) {
- arr[pos++] = (byte)(a | 0x80);
- }
- a = (x >>> (7*1));
- if (a != 0) {
- arr[pos++] = (byte)(a | 0x80);
- }
- arr[pos++] = (byte)(x & 0x7f);
- return pos;
- }
-
-
-
public UnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
- this.field = field;
- this.ti = new TermIndex(field,
- TrieField.getMainValuePrefix(searcher.getSchema().getFieldType(field)));
- uninvert(searcher);
- }
-
-
- private void uninvert(SolrIndexSearcher searcher) throws IOException {
- long startTime = System.currentTimeMillis();
-
- IndexReader reader = searcher.getIndexReader();
- int maxDoc = reader.maxDoc();
-
- int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
- this.index = index;
- final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
- final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
- maxTermCounts = new int[1024];
-
- NumberedTermsEnum te = ti.getEnumerator(reader);
-
- // threshold, over which we use set intersections instead of counting
- // to (1) save memory, and (2) speed up faceting.
- // Add 2 for testing purposes so that there will always be some terms under
- // the threshold even when the index is very small.
- int threshold = maxDoc / 20 + 2;
- // threshold = 2000000000; //////////////////////////////// USE FOR TESTING
-
- // we need a minimum of 9 bytes, but round up to 12 since the space would
- // be wasted with most allocators anyway.
- byte[] tempArr = new byte[12];
-
- //
- // enumerate all terms, and build an intermediate form of the un-inverted field.
- //
- // During this intermediate form, every document has a (potential) byte[]
- // and the int[maxDoc()] array either contains the termNumber list directly
- // or the *end* offset of the termNumber list in it's byte array (for faster
- // appending and faster creation of the final form).
- //
- // idea... if things are too large while building, we could do a range of docs
- // at a time (but it would be a fair amount slower to build)
- // could also do ranges in parallel to take advantage of multiple CPUs
-
- // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
- // values. This requires going over the field first to find the most
- // frequent terms ahead of time.
-
- SolrIndexSearcher.DocsEnumState deState = null;
-
- for (;;) {
- BytesRef t = te.term();
- if (t==null) break;
-
- int termNum = te.getTermNumber();
-
- if (termNum >= maxTermCounts.length) {
- // resize by doubling - for very large number of unique terms, expanding
- // by 4K and resultant GC will dominate uninvert times. Resize at end if material
- int[] newMaxTermCounts = new int[maxTermCounts.length*2];
- System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
- maxTermCounts = newMaxTermCounts;
- }
-
- int df = te.docFreq();
- if (df >= threshold) {
- TopTerm topTerm = new TopTerm();
- topTerm.term = new BytesRef(t);
- topTerm.termNum = termNum;
- bigTerms.put(topTerm.termNum, topTerm);
-
- if (deState == null) {
- deState = new SolrIndexSearcher.DocsEnumState();
- deState.termsEnum = te.tenum;
- deState.reuse = te.docsEnum;
- }
- DocSet set = searcher.getDocSet(new TermQuery(new Term(ti.field, topTerm.term)), deState);
- te.docsEnum = deState.reuse;
-
- maxTermCounts[termNum] = set.size();
-
- te.next();
- continue;
- }
-
- termsInverted++;
-
- DocsEnum docsEnum = te.getDocsEnum();
-
- DocsEnum.BulkReadResult bulkResult = docsEnum.getBulkResult();
-
- for(;;) {
- int n = docsEnum.read();
- if (n <= 0) break;
-
- maxTermCounts[termNum] += n;
-
- for (int i=0; i<n; i++) {
- termInstances++;
- int doc = bulkResult.docs.ints[i];
- // add 2 to the term number to make room for special reserved values:
- // 0 (end term) and 1 (index into byte array follows)
- int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
- lastTerm[doc] = termNum;
- int val = index[doc];
-
- if ((val & 0xff)==1) {
- // index into byte array (actually the end of
- // the doc-specific byte[] when building)
- int pos = val >>> 8;
- int ilen = vIntSize(delta);
- byte[] arr = bytes[doc];
- int newend = pos+ilen;
- if (newend > arr.length) {
- // We avoid a doubling strategy to lower memory usage.
- // this faceting method isn't for docs with many terms.
- // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
- // TODO: figure out what array lengths we can round up to w/o actually using more memory
- // (how much space does a byte[] take up? Is data preceded by a 32 bit length only?
- // It should be safe to round up to the nearest 32 bits in any case.
- int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
- byte[] newarr = new byte[newLen];
- System.arraycopy(arr, 0, newarr, 0, pos);
- arr = newarr;
- bytes[doc] = newarr;
- }
- pos = writeInt(delta, arr, pos);
- index[doc] = (pos<<8) | 1; // update pointer to end index in byte[]
- } else {
- // OK, this int has data in it... find the end (a zero starting byte - not
- // part of another number, hence not following a byte with the high bit set).
- int ipos;
- if (val==0) {
- ipos=0;
- } else if ((val & 0x0000ff80)==0) {
- ipos=1;
- } else if ((val & 0x00ff8000)==0) {
- ipos=2;
- } else if ((val & 0xff800000)==0) {
- ipos=3;
- } else {
- ipos=4;
- }
-
- int endPos = writeInt(delta, tempArr, ipos);
- if (endPos <= 4) {
- // value will fit in the integer... move bytes back
- for (int j=ipos; j<endPos; j++) {
- val |= (tempArr[j] & 0xff) << (j<<3);
- }
- index[doc] = val;
- } else {
- // value won't fit... move integer into byte[]
- for (int j=0; j<ipos; j++) {
- tempArr[j] = (byte)val;
- val >>>=8;
- }
- // point at the end index in the byte[]
- index[doc] = (endPos<<8) | 1;
- bytes[doc] = tempArr;
- tempArr = new byte[12];
- }
-
- }
-
+ super(field,
+ // threshold, over which we use set intersections instead of counting
+ // to (1) save memory, and (2) speed up faceting.
+ // Add 2 for testing purposes so that there will always be some terms under
+ // the threshold even when the index is very
+ // small.
+ searcher.maxDoc()/20 + 2,
+ DEFAULT_INDEX_INTERVAL_BITS);
+ //System.out.println("maxTermDocFreq=" + maxTermDocFreq + " maxDoc=" + searcher.maxDoc());
+
+ final String prefix = TrieField.getMainValuePrefix(searcher.getSchema().getFieldType(field));
+ this.searcher = searcher;
+ try {
+ uninvert(searcher.getIndexReader(), prefix == null ? null : new BytesRef(prefix));
+ } catch (IllegalStateException ise) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ise.getMessage());
+ }
+ if (tnums != null) {
+ for(byte[] target : tnums) {
+ if (target != null && target.length > (1<<24)*.9) {
+ SolrCore.log.warn("Approaching too many values for UnInvertedField faceting on field '"+field+"' : bucket size=" + target.length);
}
-
}
-
- te.next();
}
- numTermsInField = te.getTermNumber();
- te.close();
-
// free space if outrageously wasteful (tradeoff memory/cpu)
-
if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
int[] newMaxTermCounts = new int[numTermsInField];
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
maxTermCounts = newMaxTermCounts;
- }
-
- long midPoint = System.currentTimeMillis();
-
- if (termInstances == 0) {
- // we didn't invert anything
- // lower memory consumption.
- index = this.index = null;
- tnums = null;
- } else {
-
- //
- // transform intermediate form into the final form, building a single byte[]
- // at a time, and releasing the intermediate byte[]s as we go to avoid
- // increasing the memory footprint.
- //
- for (int pass = 0; pass<256; pass++) {
- byte[] target = tnums[pass];
- int pos=0; // end in target;
- if (target != null) {
- pos = target.length;
- } else {
- target = new byte[4096];
- }
-
- // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
- // where pp is the pass (which array we are building), and xx is all values.
- // each pass shares the same byte[] for termNumber lists.
- for (int docbase = pass<<16; docbase<maxDoc; docbase+=(1<<24)) {
- int lim = Math.min(docbase + (1<<16), maxDoc);
- for (int doc=docbase; doc<lim; doc++) {
- int val = index[doc];
- if ((val&0xff) == 1) {
- int len = val >>> 8;
- index[doc] = (pos<<8)|1; // change index to point to start of array
- if ((pos & 0xff000000) != 0) {
- // we only have 24 bits for the array index
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Too many values for UnInvertedField faceting on field "+field);
- }
- byte[] arr = bytes[doc];
- bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
- if (target.length <= pos + len) {
- int newlen = target.length;
- /*** we don't have to worry about the array getting too large
- * since the "pos" param will overflow first (only 24 bits available)
- if ((newlen<<1) <= 0) {
- // overflow...
- newlen = Integer.MAX_VALUE;
- if (newlen <= pos + len) {
- throw new SolrException(400,"Too many terms to uninvert field!");
- }
- } else {
- while (newlen <= pos + len) newlen<<=1; // doubling strategy
- }
- ****/
- while (newlen <= pos + len) newlen<<=1; // doubling strategy
- byte[] newtarget = new byte[newlen];
- System.arraycopy(target, 0, newtarget, 0, pos);
- target = newtarget;
- }
- System.arraycopy(arr, 0, target, pos, len);
- pos += len + 1; // skip single byte at end and leave it 0 for terminator
- }
- }
- }
-
- // shrink array
- if (pos < target.length) {
- byte[] newtarget = new byte[pos];
- System.arraycopy(target, 0, newtarget, 0, pos);
- target = newtarget;
- if (target.length > (1<<24)*.9) {
- SolrCore.log.warn("Approaching too many values for UnInvertedField faceting on field '"+field+"' : bucket size=" + target.length);
- }
- }
-
- tnums[pass] = target;
-
- if ((pass << 16) > maxDoc)
- break;
- }
}
- long endTime = System.currentTimeMillis();
-
- total_time = (int)(endTime-startTime);
- phase1_time = (int)(midPoint-startTime);
-
SolrCore.log.info("UnInverted multi-valued field " + toString());
+ //System.out.println("CREATED: " + toString() + " ti.index=" + ti.index);
}
-
-
+ public int getNumTerms() {
+ return numTermsInField;
+ }
public NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException {
use.incrementAndGet();
@@ -468,6 +211,7 @@ public class UnInvertedField {
int baseSize = docs.size();
int maxDoc = searcher.maxDoc();
+ //System.out.println("GET COUNTS field=" + field + " baseSize=" + baseSize + " minCount=" + mincount + " maxDoc=" + maxDoc + " numTermsInField=" + numTermsInField);
if (baseSize >= mincount) {
final int[] index = this.index;
@@ -481,14 +225,20 @@ public class UnInvertedField {
int startTerm = 0;
int endTerm = numTermsInField; // one past the end
- NumberedTermsEnum te = ti.getEnumerator(searcher.getIndexReader());
+ TermsEnum te = getOrdTermsEnum(searcher.getIndexReader());
if (prefix != null && prefix.length() > 0) {
BytesRef prefixBr = new BytesRef(prefix);
- te.skipTo(prefixBr);
- startTerm = te.getTermNumber();
+ if (te.seek(prefixBr, true) == TermsEnum.SeekStatus.END) {
+ startTerm = numTermsInField;
+ } else {
+ startTerm = (int) te.ord();
+ }
prefixBr.append(ByteUtils.bigTerm);
- te.skipTo(prefixBr);
- endTerm = te.getTermNumber();
+ if (te.seek(prefixBr, true) == TermsEnum.SeekStatus.END) {
+ endTerm = numTermsInField;
+ } else {
+ endTerm = (int) te.ord();
+ }
}
/***********
@@ -514,13 +264,18 @@ public class UnInvertedField {
docs = new BitDocSet(bs, maxDoc - baseSize);
// simply negating will mean that we have deleted docs in the set.
// that should be OK, as their entries in our table should be empty.
+ //System.out.println(" NEG");
}
// For the biggest terms, do straight set intersections
for (TopTerm tt : bigTerms.values()) {
+ //System.out.println(" do big termNum=" + tt.termNum + " term=" + tt.term.utf8ToString());
// TODO: counts could be deferred if sorted==false
if (tt.termNum >= startTerm && tt.termNum < endTerm) {
- counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(ti.field, tt.term)), docs);
+ counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(field, tt.term)), docs);
+ //System.out.println(" count=" + counts[tt.termNum]);
+ } else {
+ //System.out.println("SKIP term=" + tt.termNum);
}
}
@@ -537,9 +292,11 @@ public class UnInvertedField {
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
+ //System.out.println("iter doc=" + doc);
int code = index[doc];
if ((code & 0xff)==1) {
+ //System.out.println(" ptr");
int pos = code>>>8;
int whichArray = (doc >>> 16) & 0xff;
byte[] arr = tnums[whichArray];
@@ -553,9 +310,11 @@ public class UnInvertedField {
}
if (delta == 0) break;
tnum += delta - TNUM_OFFSET;
+ //System.out.println(" tnum=" + tnum);
counts[tnum]++;
}
} else {
+ //System.out.println(" inlined");
int tnum = 0;
int delta = 0;
for (;;) {
@@ -563,6 +322,7 @@ public class UnInvertedField {
if ((code & 0x80)==0) {
if (delta==0) break;
tnum += delta - TNUM_OFFSET;
+ //System.out.println(" tnum=" + tnum);
counts[tnum]++;
delta = 0;
}
@@ -583,6 +343,7 @@ public class UnInvertedField {
LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize,1000), maxsize, Long.MIN_VALUE);
int min=mincount-1; // the smallest value in the top 'N' values
+ //System.out.println("START=" + startTerm + " END=" + endTerm);
for (int i=startTerm; i<endTerm; i++) {
int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
if (c>min) {
@@ -641,11 +402,14 @@ public class UnInvertedField {
}
});
- // convert the term numbers to term values and set as the label
+ // convert the term numbers to term values and set
+ // as the label
+ //System.out.println("sortStart=" + sortedIdxStart + " end=" + sortedIdxEnd);
for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
int idx = indirect[i];
int tnum = (int)sorted[idx];
String label = getReadableValue(getTermValue(te, tnum), ft, spare);
+ //System.out.println(" label=" + label);
res.setName(idx - sortedIdxStart, label);
}
@@ -668,8 +432,6 @@ public class UnInvertedField {
res.add(label, c);
}
}
-
- te.close();
}
@@ -678,6 +440,8 @@ public class UnInvertedField {
res.add(null, SimpleFacets.getFieldMissingCount(searcher, baseDocs, field));
}
+ //System.out.println(" res=" + res);
+
return res;
}
@@ -731,8 +495,7 @@ public class UnInvertedField {
final int[] index = this.index;
final int[] counts = new int[numTermsInField];//keep track of the number of times we see each word in the field for all the documents in the docset
- NumberedTermsEnum te = ti.getEnumerator(searcher.getIndexReader());
-
+ TermsEnum te = getOrdTermsEnum(searcher.getIndexReader());
boolean doNegative = false;
if (finfo.length == 0) {
@@ -755,7 +518,7 @@ public class UnInvertedField {
for (TopTerm tt : bigTerms.values()) {
// TODO: counts could be deferred if sorted==false
if (tt.termNum >= 0 && tt.termNum < numTermsInField) {
- final Term t = new Term(ti.field, tt.term);
+ final Term t = new Term(field, tt.term);
if (finfo.length == 0) {
counts[tt.termNum] = searcher.numDocs(new TermQuery(t), docs);
} else {
@@ -836,7 +599,6 @@ public class UnInvertedField {
f.accumulateTermNum(i, value);
}
}
- te.close();
int c = missing.size();
allstats.addMissing(c);
@@ -870,23 +632,26 @@ public class UnInvertedField {
}
/** may return a reused BytesRef */
- BytesRef getTermValue(NumberedTermsEnum te, int termNum) throws IOException {
+ BytesRef getTermValue(TermsEnum te, int termNum) throws IOException {
+ //System.out.println("getTermValue termNum=" + termNum + " this=" + this + " numTerms=" + numTermsInField);
if (bigTerms.size() > 0) {
// see if the term is one of our big terms.
TopTerm tt = bigTerms.get(termNum);
if (tt != null) {
+ //System.out.println(" return big " + tt.term);
return tt.term;
}
}
- return te.skipTo(termNum);
+ return lookupTerm(te, termNum);
}
@Override
public String toString() {
+ final long indexSize = indexedTermsArray == null ? 0 : (8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings); // assume 8 byte references?
return "{field=" + field
+ ",memSize="+memSize()
- + ",tindexSize="+ti.memSize()
+ + ",tindexSize="+indexSize
+ ",time="+total_time
+ ",phase1="+phase1_time
+ ",nTerms="+numTermsInField
@@ -896,7 +661,6 @@ public class UnInvertedField {
+ "}";
}
-
//////////////////////////////////////////////////////////////////
//////////////////////////// caching /////////////////////////////
//////////////////////////////////////////////////////////////////
@@ -920,287 +684,3 @@ public class UnInvertedField {
return uif;
}
}
-
-
-// How to share TermDocs (int[] score[])???
-// Hot to share TermPositions?
-/***
-class TermEnumListener {
- void doTerm(Term t) {
- }
- void done() {
- }
-}
-***/
-
-
-class NumberedTermsEnum extends TermsEnum {
- protected final IndexReader reader;
- protected final TermIndex tindex;
- protected TermsEnum tenum;
- protected int pos=-1;
- protected BytesRef termText;
- protected DocsEnum docsEnum;
- protected Bits deletedDocs;
-
-
- NumberedTermsEnum(IndexReader reader, TermIndex tindex) throws IOException {
- this.reader = reader;
- this.tindex = tindex;
- }
-
-
- NumberedTermsEnum(IndexReader reader, TermIndex tindex, BytesRef termValue, int pos) throws IOException {
- this.reader = reader;
- this.tindex = tindex;
- this.pos = pos;
- Terms terms = MultiFields.getTerms(reader, tindex.field);
- deletedDocs = MultiFields.getDeletedDocs(reader);
- if (terms != null) {
- tenum = terms.iterator();
- tenum.seek(termValue);
- setTerm();
- }
- }
-
- @Override
- public Comparator<BytesRef> getComparator() throws IOException {
- return tenum.getComparator();
- }
-
- public DocsEnum getDocsEnum() throws IOException {
- docsEnum = tenum.docs(deletedDocs, docsEnum);
- return docsEnum;
- }
-
- protected BytesRef setTerm() throws IOException {
- termText = tenum.term();
- if (tindex.prefix != null && !termText.startsWith(tindex.prefix)) {
- termText = null;
- }
- return termText;
- }
-
- @Override
- public BytesRef next() throws IOException {
- pos++;
- if (tenum.next() == null) {
- termText = null;
- return null;
- }
- return setTerm(); // this is extra work if we know we are in bounds...
- }
-
- @Override
- public BytesRef term() {
- return termText;
- }
-
- @Override
- public int docFreq() throws IOException {
- return tenum.docFreq();
- }
-
- @Override
- public long totalTermFreq() throws IOException {
- return tenum.totalTermFreq();
- }
-
- public BytesRef skipTo(BytesRef target) throws IOException {
-
- // already here
- if (termText != null && termText.equals(target)) return termText;
-
- if (tenum == null) {
- return null;
- }
-
- int startIdx = Arrays.binarySearch(tindex.index,target);
-
- if (startIdx >= 0) {
- // we hit the term exactly... lucky us!
- TermsEnum.SeekStatus seekStatus = tenum.seek(target);
- assert seekStatus == TermsEnum.SeekStatus.FOUND;
- pos = startIdx << tindex.intervalBits;
- return setTerm();
- }
-
- // we didn't hit the term exactly
- startIdx=-startIdx-1;
-
- if (startIdx == 0) {
- // our target occurs *before* the first term
- TermsEnum.SeekStatus seekStatus = tenum.seek(target);
- assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
- pos = 0;
- return setTerm();
- }
-
- // back up to the start of the block
- startIdx--;
-
- if ((pos >> tindex.intervalBits) == startIdx && termText != null && termText.compareTo(target)<=0) {
- // we are already in the right block and the current term is before the term we want,
- // so we don't need to seek.
- } else {
- // seek to the right block
- TermsEnum.SeekStatus seekStatus = tenum.seek(tindex.index[startIdx]);
- assert seekStatus == TermsEnum.SeekStatus.FOUND;
- pos = startIdx << tindex.intervalBits;
- setTerm(); // should be non-null since it's in the index
- }
-
- while (termText != null && termText.compareTo(target) < 0) {
- next();
- }
-
- return termText;
- }
-
- public BytesRef skipTo(int termNumber) throws IOException {
- int delta = termNumber - pos;
- if (delta < 0 || delta > tindex.interval || tenum==null) {
- int idx = termNumber >>> tindex.intervalBits;
- BytesRef base = tindex.index[idx];
- pos = idx << tindex.intervalBits;
- delta = termNumber - pos;
- TermsEnum.SeekStatus seekStatus = tenum.seek(base);
- assert seekStatus == TermsEnum.SeekStatus.FOUND;
- }
- while (--delta >= 0) {
- BytesRef br = tenum.next();
- if (br == null) {
- termText = null;
- return null;
- }
- ++pos;
- }
- return setTerm();
- }
-
- protected void close() throws IOException {
- // no-op, needed so the anon subclass that does indexing
- // can build its index
- }
-
- /** The current term number, starting at 0.
- * Only valid if the previous call to next() or skipTo() returned true.
- */
- public int getTermNumber() {
- return pos;
- }
-
- @Override
- public long ord() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public SeekStatus seek(long ord) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public SeekStatus seek(BytesRef target, boolean useCache) {
- throw new UnsupportedOperationException();
- }
-}
-
-
-/**
- * Class to save memory by only storing every nth term (for random access), while
- * numbering the terms, allowing them to be retrieved later by number.
- * This is only valid when used with the IndexReader it was created with.
- * The IndexReader is not actually stored to facilitate caching by using it as a key in
- * a weak hash map.
- */
-class TermIndex {
- final static int intervalBits = 7; // decrease to a low number like 2 for testing
- final static int intervalMask = 0xffffffff >>> (32-intervalBits);
- final static int interval = 1 << intervalBits;
-
- final String field;
- final BytesRef prefix;
- BytesRef[] index;
- int nTerms;
- long sizeOfStrings;
-
- TermIndex(String field) {
- this(field, null);
- }
-
- TermIndex(String field, String prefix) {
- this.field = field;
- this.prefix = prefix == null ? null : new BytesRef(prefix);
- }
-
- NumberedTermsEnum getEnumerator(IndexReader reader, int termNumber) throws IOException {
- NumberedTermsEnum te = new NumberedTermsEnum(reader, this);
- te.skipTo(termNumber);
- return te;
- }
-
- /* The first time an enumerator is requested, it should be used
- with next() to fully traverse all of the terms so the index
- will be built.
- */
- NumberedTermsEnum getEnumerator(IndexReader reader) throws IOException {
- if (index==null) return new NumberedTermsEnum(reader,this, prefix==null?new BytesRef():prefix, 0) {
- ArrayList<BytesRef> lst;
- PagedBytes bytes;
-
- @Override
- protected BytesRef setTerm() throws IOException {
- BytesRef br = super.setTerm();
- if (br != null && (pos & intervalMask)==0) {
- sizeOfStrings += br.length;
- if (lst==null) {
- lst = new ArrayList<BytesRef>();
- bytes = new PagedBytes(15);
- }
- BytesRef out = new BytesRef();
- bytes.copy(br, out);
- lst.add(out);
- }
- return br;
- }
-
- @Override
- public BytesRef skipTo(int termNumber) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void close() throws IOException {
- nTerms=pos;
- super.close();
- index = lst!=null ? lst.toArray(new BytesRef[lst.size()]) : new BytesRef[0];
- }
- };
- else return new NumberedTermsEnum(reader,this,new BytesRef(),0);
- }
-
-
- /**
- * Returns the approximate amount of memory taken by this TermIndex.
- * This is only an approximation and doesn't take into account java object overhead.
- *
- * @return
- * the approximate memory consumption in bytes
- */
- public long memSize() {
- // assume 8 byte references?
- return 8+8+8+8+(index.length<<3)+sizeOfStrings;
- }
-}
-
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/JSONResponseWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/JSONResponseWriter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/JSONResponseWriter.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/JSONResponseWriter.java Sat May 14 13:51:35 2011
@@ -316,7 +316,8 @@ class JSONWriter extends TextResponseWri
if( idx > 0 ) {
writeArraySeparator();
}
-
+
+ indent();
writeMapOpener(doc.size());
incLevel();
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/PageTool.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/PageTool.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/PageTool.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/PageTool.java Sat May 14 13:51:35 2011
@@ -19,8 +19,10 @@ package org.apache.solr.response;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSlice;
import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrException;
public class PageTool {
private long start;
@@ -42,10 +44,16 @@ public class PageTool {
DocSlice doc_slice = (DocSlice) docs;
results_found = doc_slice.matches();
start = doc_slice.offset();
- } else {
+ } else if(docs instanceof ResultContext) {
+ DocList dl = ((ResultContext) docs).docs;
+ results_found = dl.matches();
+ start = dl.offset();
+ } else if(docs instanceof SolrDocumentList) {
SolrDocumentList doc_list = (SolrDocumentList) docs;
results_found = doc_list.getNumFound();
start = doc_list.getStart();
+ } else {
+ throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Unknown response type "+docs+". Expected one of DocSlice, ResultContext or SolrDocumentList");
}
}
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java Sat May 14 13:51:35 2011
@@ -16,10 +16,19 @@
*/
package org.apache.solr.response.transform;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.ReaderUtil;
import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.core.SolrCore;
import org.apache.solr.search.QParser;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.ValueSource;
+import java.io.IOException;
+import java.util.Map;
+
/**
* Add values from a ValueSource (function query etc)
*
@@ -32,13 +41,15 @@ public class ValueSourceAugmenter extend
{
public final String name;
public final QParser qparser;
- public final ValueSource values;
+ public final ValueSource valueSource;
+
+
- public ValueSourceAugmenter( String name, QParser qparser, ValueSource values )
+ public ValueSourceAugmenter( String name, QParser qparser, ValueSource valueSource )
{
this.name = name;
this.qparser = qparser;
- this.values = values;
+ this.valueSource = valueSource;
}
@Override
@@ -49,14 +60,42 @@ public class ValueSourceAugmenter extend
@Override
public void setContext( TransformContext context ) {
- // maybe we do something here?
+ IndexReader reader = qparser.getReq().getSearcher().getIndexReader();
+ readerContexts = reader.getTopReaderContext().leaves();
+ docValuesArr = new DocValues[readerContexts.length];
+
+ searcher = qparser.getReq().getSearcher();
+ this.fcontext = valueSource.newContext(searcher);
}
+
+ Map fcontext;
+ SolrIndexSearcher searcher;
+ IndexReader.AtomicReaderContext[] readerContexts;
+ DocValues docValuesArr[];
+
+
@Override
public void transform(SolrDocument doc, int docid) {
- // TODO, should know what the real type is -- not always string
- // how do we get to docvalues?
- Object v = "now what..."; //values.g.strVal( docid );
- doc.setField( name, v );
+ // This is only good for random-access functions
+
+ try {
+
+ // TODO: calculate this stuff just once across diff functions
+ int idx = ReaderUtil.subIndex(docid, readerContexts);
+ IndexReader.AtomicReaderContext rcontext = readerContexts[idx];
+ DocValues values = docValuesArr[idx];
+ if (values == null) {
+ docValuesArr[idx] = values = valueSource.getValues(fcontext, rcontext);
+ }
+
+ int localId = docid - rcontext.docBase;
+ Object val = values.objectVal(localId);
+ if (val != null) {
+ doc.setField( name, val );
+ }
+ } catch (IOException e) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "exception at docid " + docid + " for valuesource " + valueSource, e, false);
+ }
}
}
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/DateField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/DateField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/DateField.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/DateField.java Sat May 14 13:51:35 2011
@@ -487,6 +487,17 @@ class DateFieldSource extends FieldCache
}
@Override
+ public Object objectVal(int doc) {
+ int ord=termsIndex.getOrd(doc);
+ if (ord == 0) {
+ return null;
+ } else {
+ BytesRef br = termsIndex.lookup(ord, new BytesRef());
+ return ft.toObject(null, br);
+ }
+ }
+
+ @Override
public String toString(int doc) {
return description() + '=' + intVal(doc);
}
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/FieldProperties.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/FieldProperties.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/FieldProperties.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/FieldProperties.java Sat May 14 13:51:35 2011
@@ -22,29 +22,31 @@ import java.util.HashMap;
/**
* @version $Id$
+ *
+ * @lucene.internal
*/
-abstract class FieldProperties {
+public abstract class FieldProperties {
// use a bitfield instead of many different boolean variables since
// many of the variables are independent or semi-independent.
// bit values for boolean field properties.
- final static int INDEXED = 0x00000001;
- final static int TOKENIZED = 0x00000002;
- final static int STORED = 0x00000004;
- final static int BINARY = 0x00000008;
- final static int OMIT_NORMS = 0x00000010;
- final static int OMIT_TF_POSITIONS = 0x00000020;
- final static int STORE_TERMVECTORS = 0x00000040;
- final static int STORE_TERMPOSITIONS = 0x00000080;
- final static int STORE_TERMOFFSETS = 0x00000100;
+ protected final static int INDEXED = 0x00000001;
+ protected final static int TOKENIZED = 0x00000002;
+ protected final static int STORED = 0x00000004;
+ protected final static int BINARY = 0x00000008;
+ protected final static int OMIT_NORMS = 0x00000010;
+ protected final static int OMIT_TF_POSITIONS = 0x00000020;
+ protected final static int STORE_TERMVECTORS = 0x00000040;
+ protected final static int STORE_TERMPOSITIONS = 0x00000080;
+ protected final static int STORE_TERMOFFSETS = 0x00000100;
- final static int MULTIVALUED = 0x00000200;
- final static int SORT_MISSING_FIRST = 0x00000400;
- final static int SORT_MISSING_LAST = 0x00000800;
+ protected final static int MULTIVALUED = 0x00000200;
+ protected final static int SORT_MISSING_FIRST = 0x00000400;
+ protected final static int SORT_MISSING_LAST = 0x00000800;
- final static int REQUIRED = 0x00001000;
+ protected final static int REQUIRED = 0x00001000;
static final String[] propertyNames = {
"indexed", "tokenized", "stored",
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/FieldType.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/FieldType.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/FieldType.java Sat May 14 13:51:35 2011
@@ -84,6 +84,11 @@ public abstract class FieldType extends
public boolean isMultiValued() {
return (properties & MULTIVALUED) != 0;
}
+
+ /** Check if a property is set */
+ protected boolean hasProperty( int p ) {
+ return (properties & p) != 0;
+ }
/**
* A "polyField" is a FieldType that can produce more than one Fieldable instance for a single value, via the {@link #createFields(org.apache.solr.schema.SchemaField, Object, float)} method. This is useful
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/IndexSchema.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/IndexSchema.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/IndexSchema.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/IndexSchema.java Sat May 14 13:51:35 2011
@@ -797,19 +797,23 @@ public final class IndexSchema {
NamedNodeMap attrs = node.getAttributes();
String analyzerName = DOMUtil.getAttr(attrs,"class");
if (analyzerName != null) {
- // No need to be core-aware as Analyzers are not in the core-aware list
- final Class<? extends Analyzer> clazz = loader.findClass(analyzerName).asSubclass(Analyzer.class);
try {
+ // No need to be core-aware as Analyzers are not in the core-aware list
+ final Class<? extends Analyzer> clazz = loader.findClass
+ (analyzerName).asSubclass(Analyzer.class);
+
try {
- // first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore)
+ // first try to use a ctor with version parameter
+ // (needed for many new Analyzers that have no default one anymore)
Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
final Version luceneMatchVersion = (matchVersionStr == null) ?
solrConfig.luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr);
if (luceneMatchVersion == null) {
- throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
- "Configuration Error: Analyzer '" + clazz.getName() +
- "' needs a 'luceneMatchVersion' parameter");
+ throw new SolrException
+ ( SolrException.ErrorCode.SERVER_ERROR,
+ "Configuration Error: Analyzer '" + clazz.getName() +
+ "' needs a 'luceneMatchVersion' parameter");
}
return cnstr.newInstance(luceneMatchVersion);
} catch (NoSuchMethodException nsme) {
@@ -817,8 +821,9 @@ public final class IndexSchema {
return clazz.newInstance();
}
} catch (Exception e) {
+ log.error("Cannot load analyzer: "+analyzerName, e);
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
- "Cannot load analyzer: "+analyzerName );
+ "Cannot load analyzer: "+analyzerName, e );
}
}
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/RandomSortField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/RandomSortField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/RandomSortField.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/RandomSortField.java Sat May 14 13:51:35 2011
@@ -28,6 +28,7 @@ import org.apache.lucene.util.ReaderUtil
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.function.DocValues;
+import org.apache.solr.search.function.IntDocValues;
import org.apache.solr.search.function.ValueSource;
/**
@@ -157,37 +158,12 @@ public class RandomSortField extends Fie
@Override
public DocValues getValues(Map context, final AtomicReaderContext readerContext) throws IOException {
- return new DocValues() {
+ return new IntDocValues(this) {
private final int seed = getSeed(field, readerContext);
@Override
- public float floatVal(int doc) {
- return (float)hash(doc+seed);
- }
-
- @Override
public int intVal(int doc) {
return hash(doc+seed);
}
-
- @Override
- public long longVal(int doc) {
- return (long)hash(doc+seed);
- }
-
- @Override
- public double doubleVal(int doc) {
- return (double)hash(doc+seed);
- }
-
- @Override
- public String strVal(int doc) {
- return Integer.toString(hash(doc+seed));
- }
-
- @Override
- public String toString(int doc) {
- return description() + '=' + intVal(doc);
- }
};
}
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SchemaField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SchemaField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SchemaField.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SchemaField.java Sat May 14 13:51:35 2011
@@ -19,7 +19,6 @@ package org.apache.solr.schema;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.SortField;
import org.apache.solr.search.QParser;
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableDoubleField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableDoubleField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableDoubleField.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableDoubleField.java Sat May 14 13:51:35 2011
@@ -149,6 +149,12 @@ class SortableDoubleFieldSource extends
}
@Override
+ public Object objectVal(int doc) {
+ int ord=termsIndex.getOrd(doc);
+ return ord==0 ? null : NumberUtils.SortableStr2double(termsIndex.lookup(ord, spare));
+ }
+
+ @Override
public String toString(int doc) {
return description() + '=' + doubleVal(doc);
}
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableFloatField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableFloatField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableFloatField.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableFloatField.java Sat May 14 13:51:35 2011
@@ -154,6 +154,12 @@ class SortableFloatFieldSource extends F
}
@Override
+ public Object objectVal(int doc) {
+ int ord=termsIndex.getOrd(doc);
+ return ord==0 ? null : NumberUtils.SortableStr2float(termsIndex.lookup(ord, spare));
+ }
+
+ @Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueFloat mval = new MutableValueFloat();
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableIntField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableIntField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableIntField.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableIntField.java Sat May 14 13:51:35 2011
@@ -156,6 +156,12 @@ class SortableIntFieldSource extends Fie
}
@Override
+ public Object objectVal(int doc) {
+ int ord=termsIndex.getOrd(doc);
+ return ord==0 ? null : NumberUtils.SortableStr2int(termsIndex.lookup(ord, spare));
+ }
+
+ @Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueInt mval = new MutableValueInt();
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableLongField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableLongField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableLongField.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/SortableLongField.java Sat May 14 13:51:35 2011
@@ -150,6 +150,12 @@ class SortableLongFieldSource extends Fi
}
@Override
+ public Object objectVal(int doc) {
+ int ord=termsIndex.getOrd(doc);
+ return ord==0 ? null : NumberUtils.SortableStr2long(termsIndex.lookup(ord, spare));
+ }
+
+ @Override
public String toString(int doc) {
return description() + '=' + longVal(doc);
}
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/StrFieldSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/StrFieldSource.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/StrFieldSource.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/StrFieldSource.java Sat May 14 13:51:35 2011
@@ -19,9 +19,11 @@ package org.apache.solr.schema;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
+import org.apache.noggit.CharArr;
import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.FieldCacheSource;
import org.apache.solr.search.function.StringIndexDocValues;
+import org.apache.solr.util.ByteUtils;
import java.io.IOException;
import java.util.Map;
@@ -40,33 +42,13 @@ public class StrFieldSource extends Fiel
@Override
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new StringIndexDocValues(this, readerContext, field) {
+
@Override
protected String toTerm(String readableValue) {
return readableValue;
}
@Override
- public float floatVal(int doc) {
- return (float)intVal(doc);
- }
-
- @Override
- public int intVal(int doc) {
- int ord=termsIndex.getOrd(doc);
- return ord;
- }
-
- @Override
- public long longVal(int doc) {
- return (long)intVal(doc);
- }
-
- @Override
- public double doubleVal(int doc) {
- return (double)intVal(doc);
- }
-
- @Override
public int ordVal(int doc) {
return termsIndex.getOrd(doc);
}
@@ -77,13 +59,8 @@ public class StrFieldSource extends Fiel
}
@Override
- public String strVal(int doc) {
- int ord=termsIndex.getOrd(doc);
- if (ord == 0) {
- return null;
- } else {
- return termsIndex.lookup(ord, new BytesRef()).utf8ToString();
- }
+ public Object objectVal(int doc) {
+ return strVal(doc);
}
@Override
Modified: lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/TrieDateField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/TrieDateField.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/TrieDateField.java (original)
+++ lucene/dev/branches/flexscoring/solr/src/java/org/apache/solr/schema/TrieDateField.java Sat May 14 13:51:35 2011
@@ -18,210 +18,125 @@
package org.apache.solr.schema;
import org.apache.noggit.CharArr;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.analysis.CharFilterFactory;
-import org.apache.solr.analysis.TokenFilterFactory;
-import org.apache.solr.analysis.TokenizerChain;
-import org.apache.solr.analysis.TrieTokenizerFactory;
-import org.apache.solr.search.function.*;
+import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.QParser;
import org.apache.solr.response.TextResponseWriter;
import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.document.Field;
import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.NumericRangeQuery;
-import org.apache.lucene.search.cache.CachedArrayCreator;
-import org.apache.lucene.search.cache.LongValuesCreator;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.NumericUtils;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.NumericTokenStream;
import java.util.Map;
import java.util.Date;
import java.io.IOException;
public class TrieDateField extends DateField {
- protected int precisionStepArg = TrieField.DEFAULT_PRECISION_STEP; // the one passed in or defaulted
- protected int precisionStep = precisionStepArg; // normalized
+
+ final TrieField wrappedField = new TrieField() {{
+ type = TrieTypes.DATE;
+ }};
@Override
protected void init(IndexSchema schema, Map<String, String> args) {
- String p = args.remove("precisionStep");
- if (p != null) {
- precisionStepArg = Integer.parseInt(p);
- }
- // normalize the precisionStep
- precisionStep = precisionStepArg;
- if (precisionStep<=0 || precisionStep>=64) precisionStep=Integer.MAX_VALUE;
-
- CharFilterFactory[] filterFactories = new CharFilterFactory[0];
- TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
- analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(TrieField.TrieTypes.DATE, precisionStep), tokenFilterFactories);
- // for query time we only need one token, so we use the biggest possible precisionStep:
- queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(TrieField.TrieTypes.DATE, Integer.MAX_VALUE), tokenFilterFactories);
+ wrappedField.init(schema, args);
+ analyzer = wrappedField.analyzer;
+ queryAnalyzer = wrappedField.queryAnalyzer;
}
@Override
public Date toObject(Fieldable f) {
- byte[] arr = f.getBinaryValue();
- if (arr==null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,TrieField.badFieldString(f));
- return new Date(TrieFieldHelper.toLong(arr));
+ return (Date) wrappedField.toObject(f);
}
@Override
public Object toObject(SchemaField sf, BytesRef term) {
- return new Date(NumericUtils.prefixCodedToLong(term));
+ return wrappedField.toObject(sf, term);
}
@Override
public SortField getSortField(SchemaField field, boolean top) {
- field.checkSortability();
-
- int flags = CachedArrayCreator.CACHE_VALUES_AND_BITS;
- boolean sortMissingLast = field.sortMissingLast();
- boolean sortMissingFirst = field.sortMissingFirst();
-
- Object missingValue = null;
- if( sortMissingLast ) {
- missingValue = top ? Long.MIN_VALUE : Long.MAX_VALUE;
- } else if( sortMissingFirst ) {
- missingValue = top ? Long.MAX_VALUE : Long.MIN_VALUE;
- }
- return new SortField(new LongValuesCreator(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, flags), top).setMissingValue(missingValue);
+ return wrappedField.getSortField(field, top);
}
@Override
public ValueSource getValueSource(SchemaField field, QParser parser) {
- field.checkFieldCacheSource(parser);
- return new TrieDateFieldSource( new LongValuesCreator( field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, CachedArrayCreator.CACHE_VALUES_AND_BITS ));
+ return wrappedField.getValueSource(field, parser);
+ }
+
+ /**
+ * @return the precisionStep used to index values into the field
+ */
+ public int getPrecisionStep() {
+ return wrappedField.getPrecisionStep();
}
+
@Override
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
- byte[] arr = f.getBinaryValue();
- if (arr==null) {
- writer.writeStr(name, TrieField.badFieldString(f),true);
- return;
- }
-
- writer.writeDate(name,new Date(TrieFieldHelper.toLong(arr)));
+ wrappedField.write(writer, name, f);
}
@Override
public boolean isTokenized() {
- return true;
+ return wrappedField.isTokenized();
}
- /**
- * @return the precisionStep used to index values into the field
- */
- public int getPrecisionStep() {
- return precisionStepArg;
+ @Override
+ public boolean multiValuedFieldCache() {
+ return wrappedField.multiValuedFieldCache();
}
-
-
@Override
public String storedToReadable(Fieldable f) {
- return toExternal(f);
+ return wrappedField.storedToReadable(f);
}
@Override
public String readableToIndexed(String val) {
- // TODO: Numeric should never be handled as String, that may break in future lucene versions! Change to use BytesRef for term texts!
- BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
- NumericUtils.longToPrefixCoded(super.parseMath(null, val).getTime(), 0, bytes);
- return bytes.utf8ToString();
+ return wrappedField.readableToIndexed(val);
}
@Override
public String toInternal(String val) {
- return readableToIndexed(val);
+ return wrappedField.toInternal(val);
}
@Override
public String toExternal(Fieldable f) {
- byte[] arr = f.getBinaryValue();
- if (arr==null) return TrieField.badFieldString(f);
- return super.toExternal(new Date(TrieFieldHelper.toLong(arr)));
+ return wrappedField.toExternal(f);
}
@Override
public String indexedToReadable(String _indexedForm) {
- final BytesRef indexedForm = new BytesRef(_indexedForm);
- return super.toExternal( new Date(NumericUtils.prefixCodedToLong(indexedForm)) );
+ return wrappedField.indexedToReadable(_indexedForm);
}
@Override
public void indexedToReadable(BytesRef input, CharArr out) {
- String ext = super.toExternal( new Date(NumericUtils.prefixCodedToLong(input)) );
- out.write(ext);
+ wrappedField.indexedToReadable(input, out);
}
@Override
public String storedToIndexed(Fieldable f) {
- // TODO: optimize to remove redundant string conversion
- return readableToIndexed(storedToReadable(f));
+ return wrappedField.storedToIndexed(f);
}
@Override
public Fieldable createField(SchemaField field, Object value, float boost) {
- boolean indexed = field.indexed();
- boolean stored = field.stored();
-
- if (!indexed && !stored) {
- if (log.isTraceEnabled())
- log.trace("Ignoring unindexed/unstored field: " + field);
- return null;
- }
-
- int ps = precisionStep;
-
- byte[] arr=null;
- TokenStream ts=null;
-
- long time = (value instanceof Date)
- ? ((Date)value).getTime()
- : super.parseMath(null, value.toString()).getTime();
-
- if (stored) arr = TrieFieldHelper.toArr(time);
- if (indexed) ts = new NumericTokenStream(ps).setLongValue(time);
-
- Field f;
- if (stored) {
- f = new Field(field.getName(), arr);
- if (indexed) f.setTokenStream(ts);
- } else {
- f = new Field(field.getName(), ts);
- }
-
- // term vectors aren't supported
-
- f.setOmitNorms(field.omitNorms());
- f.setOmitTermFreqAndPositions(field.omitTf());
- f.setBoost(boost);
- return f;
+ return wrappedField.createField(field, value, boost);
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
- return getRangeQuery(parser, field,
- min==null ? null : super.parseMath(null,min),
- max==null ? null : super.parseMath(null,max),
- minInclusive, maxInclusive);
+ return wrappedField.getRangeQuery(parser, field, min, max, minInclusive, maxInclusive);
}
@Override
public Query getRangeQuery(QParser parser, SchemaField sf, Date min, Date max, boolean minInclusive, boolean maxInclusive) {
- int ps = precisionStep;
- Query query = NumericRangeQuery.newLongRange(sf.getName(), ps,
+ return NumericRangeQuery.newLongRange(sf.getName(), wrappedField.precisionStep,
min == null ? null : min.getTime(),
max == null ? null : max.getTime(),
minInclusive, maxInclusive);
-
- return query;
}
}