You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2008/11/25 05:02:10 UTC
svn commit: r720403 - in /lucene/solr/trunk: ./
src/java/org/apache/solr/common/params/ src/java/org/apache/solr/request/
src/test/org/apache/solr/request/ src/test/test-files/solr/conf/
Author: yonik
Date: Mon Nov 24 20:02:09 2008
New Revision: 720403
URL: http://svn.apache.org/viewvc?rev=720403&view=rev
Log:
SOLR-475:multi-valued faceting via un-inverted field
Added:
lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java (with props)
lucene/solr/trunk/src/test/org/apache/solr/request/TestFaceting.java (with props)
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/common/params/FacetParams.java
lucene/solr/trunk/src/java/org/apache/solr/request/SimpleFacets.java
lucene/solr/trunk/src/test/org/apache/solr/request/SimpleFacetsTest.java
lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=720403&r1=720402&r2=720403&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Mon Nov 24 20:02:09 2008
@@ -96,6 +96,11 @@
2. SOLR-808: Write string keys in Maps as extern strings in the javabin format. (Noble Paul via shalin)
+ 3. SOLR-475: New faceting method with better performance and smaller memory usage for
+ multi-valued fields with many unique values but relatively few values per document.
+ Controllable via the facet.method parameter - "fc" is the new default method and "enum"
+ is the original method. (yonik)
+
Bug Fixes
----------------------
Modified: lucene/solr/trunk/src/java/org/apache/solr/common/params/FacetParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/common/params/FacetParams.java?rev=720403&r1=720402&r2=720403&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/common/params/FacetParams.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/common/params/FacetParams.java Mon Nov 24 20:02:09 2008
@@ -29,6 +29,20 @@
* Should facet counts be calculated?
*/
public static final String FACET = "facet";
+
+ /** What method should be used to do the faceting */
+ public static final String FACET_METHOD = FACET + ".method";
+
+ /** Value for FACET_METHOD param to indicate that Solr should enumerate over terms
+ * in a field to calculate the facet counts.
+ */
+ public static final String FACET_METHOD_enum = "enum";
+
+ /** Value for FACET_METHOD param to indicate that Solr should enumerate over documents
+ * and count up terms by consulting an uninverted representation of the field values
+ * (such as the FieldCache used for sorting).
+ */
+ public static final String FACET_METHOD_fc = "fc";
/**
* Any lucene formated queries the user would like to use for
Modified: lucene/solr/trunk/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/request/SimpleFacets.java?rev=720403&r1=720402&r2=720403&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/request/SimpleFacets.java Mon Nov 24 20:02:09 2008
@@ -148,16 +148,32 @@
boolean sort = params.getFieldBool(field, FacetParams.FACET_SORT, limit>0);
String prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX);
+
NamedList counts;
SchemaField sf = searcher.getSchema().getField(field);
FieldType ft = sf.getType();
- if (sf.multiValued() || ft.isTokenized() || ft instanceof BoolField) {
+
+ // determine what type of faceting method to use
+ String method = params.getFieldParam(field, FacetParams.FACET_METHOD);
+ boolean enumMethod = FacetParams.FACET_METHOD_enum.equals(method);
+ if (method == null && ft instanceof BoolField) {
// Always use filters for booleans... we know the number of values is very small.
+ enumMethod = true;
+ }
+ boolean multiToken = sf.multiValued() || ft.isTokenized();
+
+ // unless the enum method is explicitly specified, use a counting method.
+ if (enumMethod) {
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix);
} else {
- // TODO: future logic could use filters instead of the fieldcache if
- // the number of terms in the field is small enough.
- counts = getFieldCacheCounts(searcher, docs, field, offset,limit, mincount, missing, sort, prefix);
+ if (multiToken) {
+ UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
+ counts = uif.getCounts(searcher, docs, offset, limit, mincount,missing,sort,prefix);
+ } else {
+ // TODO: future logic could use filters instead of the fieldcache if
+ // the number of terms in the field is small enough.
+ counts = getFieldCacheCounts(searcher, docs, field, offset,limit, mincount, missing, sort, prefix);
+ }
}
return counts;
Added: lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java?rev=720403&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java Mon Nov 24 20:02:09 2008
@@ -0,0 +1,908 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.request;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.search.TermQuery;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SimpleFacets;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.search.BitDocSet;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.BoundedTreeSet;
+import org.apache.solr.util.OpenBitSet;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+/**
+ *
+ * Final form of the un-inverted field:
+ * Each document points to a list of term numbers that are contained in that document.
+ *
+ * Term numbers are in sorted order, and are encoded as variable-length deltas from the
+ * previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A
+ * term number of 0 signals the end of the termNumber list.
+ *
+ * There is a singe int[maxDoc()] which either contains a pointer into a byte[] for
+ * the termNumber lists, or directly contains the termNumber list if it fits in the 4
+ * bytes of an integer. If the first byte in the integer is 1, the next 3 bytes
+ * are a pointer into a byte[] where the termNumber list starts.
+ *
+ * There are actually 256 byte arrays, to compensate for the fact that the pointers
+ * into the byte arrays are only 3 bytes long. The correct byte array for a document
+ * is a function of it's id.
+ *
+ * To save space and speed up faceting, any term that matches enough documents will
+ * not be un-inverted... it will be skipped while building the un-inverted field structure,
+ * and will use a set intersection method during faceting.
+ *
+ * To further save memory, the terms (the actual string values) are not all stored in
+ * memory, but a TermIndex is used to convert term numbers to term values only
+ * for the terms needed after faceting has completed. Only every 128th term value
+ * is stored, along with it's corresponding term number, and this is used as an
+ * index to find the closest term and iterate until the desired number is hit (very
+ * much like Lucene's own internal term index).
+ *
+ */
+class UnInvertedField {
+ private static int TNUM_OFFSET=2;
+
+ static class TopTerm {
+ Term term;
+ int termNum;
+
+ long memSize() {
+ return 8 + // obj header
+ 8 + 8 +(term.text().length()<<1) + //term
+ 4; // int
+ }
+ }
+
+ String field;
+ int numTermsInField;
+ int termsInverted; // number of unique terms that were un-inverted
+ long termInstances; // total number of references to term numbers
+ final TermIndex ti;
+
+ int[] index;
+ byte[][] tnums = new byte[256][];
+ int[] maxTermCounts;
+ final Map<Integer,TopTerm> bigTerms = new LinkedHashMap<Integer,TopTerm>();
+
+
+ public long memSize() {
+ long sz = 6*8 + 12; // local fields
+ sz += bigTerms.size() * 64;
+ for (TopTerm tt : bigTerms.values()) {
+ sz += tt.memSize();
+ }
+ if (index != null) sz += index.length * 4;
+ if (tnums!=null) {
+ for (byte[] arr : tnums)
+ if (arr != null) sz += arr.length;
+ }
+ if (maxTermCounts != null)
+ sz += maxTermCounts.length * 4;
+ sz += ti.memSize();
+ return sz;
+ }
+
+
+ /** Number of bytes to represent an unsigned int as a vint. */
+ static int vIntSize(int x) {
+ if ((x & (0xffffffff << (7*1))) == 0 ) {
+ return 1;
+ }
+ if ((x & (0xffffffff << (7*2))) == 0 ) {
+ return 2;
+ }
+ if ((x & (0xffffffff << (7*3))) == 0 ) {
+ return 3;
+ }
+ if ((x & (0xffffffff << (7*4))) == 0 ) {
+ return 4;
+ }
+ return 5;
+ }
+
+
+ // todo: if we know the size of the vInt already, we could do
+ // a single switch on the size
+ static int writeInt(int x, byte[] arr, int pos) {
+ int a;
+ a = (x >>> (7*4));
+ if (a != 0) {
+ arr[pos++] = (byte)(a | 0x80);
+ }
+ a = (x >>> (7*3));
+ if (a != 0) {
+ arr[pos++] = (byte)(a | 0x80);
+ }
+ a = (x >>> (7*2));
+ if (a != 0) {
+ arr[pos++] = (byte)(a | 0x80);
+ }
+ a = (x >>> (7*1));
+ if (a != 0) {
+ arr[pos++] = (byte)(a | 0x80);
+ }
+ arr[pos++] = (byte)(x & 0x7f);
+ return pos;
+ }
+
+
+
+ public UnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
+ this.field = field;
+ this.ti = new TermIndex(field);
+ uninvert(searcher);
+ }
+
+
+ private void uninvert(SolrIndexSearcher searcher) throws IOException {
+ long startTime = System.currentTimeMillis();
+
+ IndexReader reader = searcher.getReader();
+ int maxDoc = reader.maxDoc();
+
+ int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
+ this.index = index;
+ final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
+ final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
+ maxTermCounts = new int[1024];
+
+ NumberedTermEnum te = ti.getEnumerator(reader);
+
+ // threshold, over which we use set intersections instead of counting
+ // to (1) save memory, and (2) speed up faceting.
+ // Add 2 for testing purposes so that there will always be some terms under
+ // the threshold even when the index is very small.
+ int threshold = maxDoc / 20 + 2;
+ // threshold = 2000000000; //////////////////////////////// USE FOR TESTING
+ int[] docs = new int[1000];
+ int[] freqs = new int[1000];
+
+ // we need a minimum of 9 bytes, but round up to 12 since the space would
+ // be wasted with most allocators anyway.
+ byte[] tempArr = new byte[12];
+
+ //
+ // enumerate all terms, and build an intermediate form of the un-inverted field.
+ //
+ // During this intermediate form, every document has a (potential) byte[]
+ // and the int[maxDoc()] array either contains the termNumber list directly
+ // or the *end* offset of the termNumber list in it's byte array (for faster
+ // appending and faster creation of the final form).
+ //
+ // idea... if things are too large while building, we could do a range of docs
+ // at a time (but it would be a fair amount slower to build)
+ // could also do ranges in parallel to take advantage of multiple CPUs
+
+ // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
+ // values. This requires going over the field first to find the most
+ // frequent terms ahead of time.
+
+ for (;;) {
+ Term t = te.term();
+ if (t==null) break;
+
+ int termNum = te.getTermNumber();
+
+ if (termNum >= maxTermCounts.length) {
+ // resize, but conserve memory by not doubling
+ // resize at end??? we waste a maximum of 16K (average of 8K)
+ int[] newMaxTermCounts = new int[maxTermCounts.length+4096];
+ System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
+ maxTermCounts = newMaxTermCounts;
+ }
+
+ int df = te.docFreq();
+ if (df >= threshold) {
+ TopTerm topTerm = new TopTerm();
+ topTerm.term = t;
+ topTerm.termNum = termNum;
+ bigTerms.put(topTerm.termNum, topTerm);
+
+ DocSet set = searcher.getDocSet(new TermQuery(topTerm.term));
+ maxTermCounts[termNum] = set.size();
+
+ te.next();
+ continue;
+ }
+
+ termsInverted++;
+
+ TermDocs td = te.getTermDocs();
+ td.seek(te);
+ for(;;) {
+ int n = td.read(docs,freqs);
+ if (n <= 0) break;
+
+ maxTermCounts[termNum] += n;
+
+ for (int i=0; i<n; i++) {
+ termInstances++;
+ int doc = docs[i];
+ // add 2 to the term number to make room for special reserved values:
+ // 0 (end term) and 1 (index into byte array follows)
+ int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
+ lastTerm[doc] = termNum;
+ int val = index[doc];
+
+ if ((val & 0xff)==1) {
+ // index into byte array (actually the end of
+ // the doc-specific byte[] when building)
+ int pos = val >>> 8;
+ int ilen = vIntSize(delta);
+ byte[] arr = bytes[doc];
+ int newend = pos+ilen;
+ if (newend > arr.length) {
+ // We avoid a doubling strategy to lower memory usage.
+ // this faceting method isn't for docs with many terms.
+ // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
+ // TODO: figure out what array lengths we can round up to w/o actually using more memory
+ // (how much space does a byte[] take up? Is data preceded by a 32 bit length only?
+ // It should be safe to round up to the nearest 32 bits in any case.
+ int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
+ byte[] newarr = new byte[newLen];
+ System.arraycopy(arr, 0, newarr, 0, pos);
+ arr = newarr;
+ bytes[doc] = newarr;
+ }
+ pos = writeInt(delta, arr, pos);
+ index[doc] = (pos<<8) | 1; // update pointer to end index in byte[]
+ } else {
+ // OK, this int has data in it... find the end (a zero starting byte - not
+ // part of another number, hence not following a byte with the high bit set).
+ int ipos;
+ if (val==0) {
+ ipos=0;
+ } else if ((val & 0x0000ff80)==0) {
+ ipos=1;
+ } else if ((val & 0x00ff8000)==0) {
+ ipos=2;
+ } else if ((val & 0xff800000)==0) {
+ ipos=3;
+ } else {
+ ipos=4;
+ }
+
+ int endPos = writeInt(delta, tempArr, ipos);
+ if (endPos <= 4) {
+ // value will fit in the integer... move bytes back
+ for (int j=ipos; j<endPos; j++) {
+ val |= (tempArr[j] & 0xff) << (j<<3);
+ }
+ index[doc] = val;
+ } else {
+ // value won't fit... move integer into byte[]
+ for (int j=0; j<ipos; j++) {
+ tempArr[j] = (byte)val;
+ val >>>=8;
+ }
+ // point at the end index in the byte[]
+ index[doc] = (endPos<<8) | 1;
+ bytes[doc] = tempArr;
+ tempArr = new byte[12];
+ }
+
+ }
+
+ }
+
+ }
+
+ te.next();
+ }
+
+ numTermsInField = te.getTermNumber();
+ te.close();
+
+ long midPoint = System.currentTimeMillis();
+
+ if (termInstances == 0) {
+ // we didn't invert anything
+ // lower memory consumption.
+ index = this.index = null;
+ tnums = null;
+ } else {
+
+ //
+ // transform intermediate form into the final form, building a single byte[]
+ // at a time, and releasing the intermediate byte[]s as we go to avoid
+ // increasing the memory footprint.
+ //
+ for (int pass = 0; pass<256; pass++) {
+ byte[] target = tnums[pass];
+ int pos=0; // end in target;
+ if (target != null) {
+ pos = target.length;
+ } else {
+ target = new byte[4096];
+ }
+
+ // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
+ // where pp is the pass (which array we are building), and xx is all values.
+ // each pass shares the same byte[] for termNumber lists.
+ for (int docbase = pass<<16; docbase<maxDoc; docbase+=(1<<24)) {
+ int lim = Math.min(docbase + (1<<16), maxDoc);
+ for (int doc=docbase; doc<lim; doc++) {
+ int val = index[doc];
+ if ((val&0xff) == 1) {
+ int len = val >>> 8;
+ index[doc] = (pos<<8)|1; // change index to point to start of array
+ if ((pos & 0xff000000) != 0) {
+ // we only have 24 bits for the array index
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Too many values for UnInvertedField faceting on field "+field);
+ }
+ byte[] arr = bytes[doc];
+ bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
+ if (target.length <= pos + len) {
+ int newlen = target.length;
+ /*** we don't have to worry about the array getting too large
+ * since the "pos" param will overflow first (only 24 bits available)
+ if ((newlen<<1) <= 0) {
+ // overflow...
+ newlen = Integer.MAX_VALUE;
+ if (newlen <= pos + len) {
+ throw new SolrException(400,"Too many terms to uninvert field!");
+ }
+ } else {
+ while (newlen <= pos + len) newlen<<=1; // doubling strategy
+ }
+ ****/
+ while (newlen <= pos + len) newlen<<=1; // doubling strategy
+ byte[] newtarget = new byte[newlen];
+ System.arraycopy(target, 0, newtarget, 0, pos);
+ target = newtarget;
+ }
+ System.arraycopy(arr, 0, target, pos, len);
+ pos += len + 1; // skip single byte at end and leave it 0 for terminator
+ }
+ }
+ }
+
+ // shrink array
+ if (pos < target.length) {
+ byte[] newtarget = new byte[pos];
+ System.arraycopy(target, 0, newtarget, 0, pos);
+ target = newtarget;
+ }
+
+ tnums[pass] = target;
+
+ if ((pass << 16) > maxDoc)
+ break;
+ }
+ }
+
+ long endTime = System.currentTimeMillis();
+
+ SolrCore.log.info("UnInverted multi-valued field " + field + ", memSize=" + memSize()
+ + ", time="+(endTime-startTime)+", phase1="+(midPoint-startTime)
+ + ", nTerms=" + numTermsInField + ", bigTerms=" + bigTerms.size()
+ + ", termInstances=" + termInstances
+ );
+ }
+
+
+
+
+ public NamedList getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, int mincount, boolean missing, boolean sort, String prefix) throws IOException {
+ FieldType ft = searcher.getSchema().getFieldType(field);
+
+ NamedList res = new NamedList(); // order is important
+
+ DocSet docs = baseDocs;
+ int baseSize = docs.size();
+ int maxDoc = searcher.maxDoc();
+
+ if (baseSize >= mincount) {
+
+ final int[] index = this.index;
+ final int[] counts = new int[numTermsInField];
+
+ //
+ // If there is prefix, find it's start and end term numbers
+ //
+ int startTerm = 0;
+ int endTerm = numTermsInField; // one past the end
+
+ NumberedTermEnum te = ti.getEnumerator(searcher.getReader());
+ if (prefix != null && prefix.length() > 0) {
+ te.skipTo(prefix);
+ startTerm = te.getTermNumber();
+ te.skipTo(prefix + "\uffff\uffff\uffff\uffff");
+ endTerm = te.getTermNumber();
+ }
+
+ /***********
+ // Alternative 2: get the docSet of the prefix (could take a while) and
+ // then do the intersection with the baseDocSet first.
+ if (prefix != null && prefix.length() > 0) {
+ docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs);
+ // The issue with this method are problems of returning 0 counts for terms w/o
+ // the prefix. We can't just filter out those terms later because it may
+ // mean that we didn't collect enough terms in the queue (in the sorted case).
+ }
+ ***********/
+
+ boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0
+ && startTerm==0 && endTerm==numTermsInField
+ && docs instanceof BitDocSet;
+
+ if (doNegative) {
+ OpenBitSet bs = (OpenBitSet)((BitDocSet)docs).getBits().clone();
+ bs.flip(0, maxDoc);
+ // TODO: when iterator across negative elements is available, use that
+ // instead of creating a new bitset and inverting.
+ docs = new BitDocSet(bs, maxDoc - baseSize);
+ // simply negating will mean that we have deleted docs in the set.
+ // that should be OK, as their entries in our table should be empty.
+ }
+
+ // For the biggest terms, do straight set intersections
+ for (TopTerm tt : bigTerms.values()) {
+ // TODO: counts could be deferred if sorted==false
+ if (tt.termNum >= startTerm && tt.termNum < endTerm) {
+ counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs);
+ }
+ }
+
+ // TODO: we could short-circuit counting altogether for sorted faceting
+ // where we already have enough terms from the bigTerms
+
+ // TODO: we could shrink the size of the collection array, and
+ // additionally break when the termNumber got above endTerm, but
+ // it would require two extra conditionals in the inner loop (although
+ // they would be predictable for the non-prefix case).
+ // Perhaps a different copy of the code would be warranted.
+
+ if (termInstances > 0) {
+ DocIterator iter = docs.iterator();
+ while (iter.hasNext()) {
+ int doc = iter.nextDoc();
+ int code = index[doc];
+
+ if ((code & 0xff)==1) {
+ int pos = code>>>8;
+ int whichArray = (doc >>> 16) & 0xff;
+ byte[] arr = tnums[whichArray];
+ int tnum = 0;
+ for(;;) {
+ int delta = 0;
+ for(;;) {
+ byte b = arr[pos++];
+ delta = (delta << 7) | (b & 0x7f);
+ if ((b & 0x80) == 0) break;
+ }
+ if (delta == 0) break;
+ tnum += delta - TNUM_OFFSET;
+ counts[tnum]++;
+ }
+ } else {
+ int tnum = 0;
+ int delta = 0;
+ for (;;) {
+ delta = (delta << 7) | (code & 0x7f);
+ if ((code & 0x80)==0) {
+ if (delta==0) break;
+ tnum += delta - TNUM_OFFSET;
+ counts[tnum]++;
+ delta = 0;
+ }
+ code >>>= 8;
+ }
+ }
+ }
+ }
+
+ int off=offset;
+ int lim=limit>=0 ? limit : Integer.MAX_VALUE;
+
+ if (sort) {
+ int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
+ maxsize = Math.min(maxsize, numTermsInField);
+ final BoundedTreeSet<Long> queue = new BoundedTreeSet<Long>(maxsize);
+ int min=mincount-1; // the smallest value in the top 'N' values
+ for (int i=startTerm; i<endTerm; i++) {
+ int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
+ if (c>min) {
+ // NOTE: we use c>min rather than c>=min as an optimization because we are going in
+ // index order, so we already know that the keys are ordered. This can be very
+ // important if a lot of the counts are repeated (like zero counts would be).
+
+ // minimize object creation and speed comparison by creating a long that
+ // encompases both count and term number.
+ // Since smaller values are kept in the TreeSet, make higher counts smaller.
+ //
+ // for equal counts, lower term numbers
+ // should come first and hence be "greater"
+
+ //long pair = (((long)c)<<32) | (0x7fffffff-i) ; // use if priority queue
+ long pair = (((long)-c)<<32) | i;
+ queue.add(new Long(pair));
+ if (queue.size()>=maxsize) min=-(int)(queue.last().longValue() >>> 32);
+ }
+ }
+ // now select the right page from the results
+ for (Long p : queue) {
+ if (--off>=0) continue;
+ if (--lim<0) break;
+ int c = -(int)(p.longValue() >>> 32);
+ //int tnum = 0x7fffffff - (int)p.longValue(); // use if priority queue
+ int tnum = (int)p.longValue();
+ String label = ft.indexedToReadable(getTermText(te, tnum));
+ res.add(label, c);
+ }
+ } else {
+ // add results in index order
+ int i=startTerm;
+ if (mincount<=0) {
+ // if mincount<=0, then we won't discard any terms and we know exactly
+ // where to start.
+ i=startTerm+off;
+ off=0;
+ }
+
+ for (; i<endTerm; i++) {
+ int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
+ if (c==0) {
+
+ }
+ if (c<mincount || --off>=0) continue;
+ if (--lim<0) break;
+
+ String label = ft.indexedToReadable(getTermText(te, i));
+ res.add(label, c);
+ }
+ }
+
+ te.close();
+ }
+
+
+ if (missing) {
+ // TODO: a faster solution for this?
+ res.add(null, SimpleFacets.getFieldMissingCount(searcher, baseDocs, field));
+ }
+
+ return res;
+ }
+
+
+ String getTermText(NumberedTermEnum te, int termNum) throws IOException {
+ if (bigTerms.size() > 0) {
+ // see if the term is one of our big terms.
+ TopTerm tt = bigTerms.get(termNum);
+ if (tt != null) {
+ return tt.term.text();
+ }
+ }
+
+ te.skipTo(termNum);
+ return te.term().text();
+ }
+
+
+ //////////////////////////////////////////////////////////////////
+ //////////////////////////// caching /////////////////////////////
+ //////////////////////////////////////////////////////////////////
+ static final class CreationPlaceholder {
+ Object value;
+ }
+
+ public static UnInvertedField getUnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
+ return (UnInvertedField)multiValuedFieldCache.get(searcher, field);
+ }
+
+ static Cache multiValuedFieldCache = new Cache() {
+ protected Object createValue(SolrIndexSearcher searcher, Object key) throws IOException {
+ return new UnInvertedField((String)key, searcher);
+ }
+ };
+
+ /** Internal cache. (from lucene FieldCache) */
+ abstract static class Cache {
+ private final Map readerCache = new WeakHashMap();
+
+ protected abstract Object createValue(SolrIndexSearcher searcher, Object key) throws IOException;
+
+ public Object get(SolrIndexSearcher searcher, Object key) throws IOException {
+ Map innerCache;
+ Object value;
+ synchronized (readerCache) {
+ innerCache = (Map) readerCache.get(searcher);
+ if (innerCache == null) {
+ innerCache = new HashMap();
+ readerCache.put(searcher, innerCache);
+ value = null;
+ } else {
+ value = innerCache.get(key);
+ }
+ if (value == null) {
+ value = new CreationPlaceholder();
+ innerCache.put(key, value);
+ }
+ }
+ if (value instanceof CreationPlaceholder) {
+ synchronized (value) {
+ CreationPlaceholder progress = (CreationPlaceholder) value;
+ if (progress.value == null) {
+ progress.value = createValue(searcher, key);
+ synchronized (readerCache) {
+ innerCache.put(key, progress.value);
+ }
+ }
+ return progress.value;
+ }
+ }
+
+ return value;
+ }
+ }
+}
+
+
+// How to share TermDocs (int[] score[])???
+// Hot to share TermPositions?
+/***
+class TermEnumListener {
+ void doTerm(Term t) {
+ }
+ void done() {
+ }
+}
+***/
+
+
+class NumberedTermEnum extends TermEnum {
+ protected final IndexReader reader;
+ protected final TermIndex tindex;
+ protected TermEnum tenum;
+ protected int pos=-1;
+ protected Term t;
+ protected TermDocs termDocs;
+
+
+ NumberedTermEnum(IndexReader reader, TermIndex tindex) throws IOException {
+ this.reader = reader;
+ this.tindex = tindex;
+ }
+
+
+ NumberedTermEnum(IndexReader reader, TermIndex tindex, String termValue, int pos) throws IOException {
+ this.reader = reader;
+ this.tindex = tindex;
+ this.pos = pos;
+ tenum = reader.terms(tindex.createTerm(termValue));
+ setTerm();
+ }
+
+ public TermDocs getTermDocs() throws IOException {
+ if (termDocs==null) termDocs = reader.termDocs(t);
+ else termDocs.seek(t);
+ return termDocs;
+ }
+
+ protected boolean setTerm() {
+ t = tenum.term();
+ if (t==null || t.field() != tindex.fterm.field()) { // intern'd compare
+ t = null;
+ return false;
+ }
+ return true;
+ }
+
+
+ public boolean next() throws IOException {
+ pos++;
+ boolean b = tenum.next();
+ if (!b) {
+ t = null;
+ return false;
+ }
+ return setTerm(); // this is extra work if we know we are in bounds...
+ }
+
+ public Term term() {
+ return t;
+ }
+
+ public int docFreq() {
+ return tenum.docFreq();
+ }
+
+ public void close() throws IOException {
+ tenum.close();
+ }
+
+ public boolean skipTo(String target) throws IOException {
+ return skipTo(tindex.fterm.createTerm(target));
+ }
+
+ public boolean skipTo(Term target) throws IOException {
+ // already here
+ if (t != null && t.equals(target)) return true;
+
+ int startIdx = Arrays.binarySearch(tindex.index,target.text());
+
+ if (startIdx >= 0) {
+ // we hit the term exactly... lucky us!
+ tenum = reader.terms(target);
+ pos = startIdx << tindex.intervalBits;
+ return setTerm();
+ }
+
+ // we didn't hit the term exactly
+ startIdx=-startIdx-1;
+
+ if (startIdx == 0) {
+ // our target occurs *before* the first term
+ tenum = reader.terms(target);
+ pos = 0;
+ return setTerm();
+ }
+
+ // back up to the start of the block
+ startIdx--;
+
+ if ((pos >> tindex.intervalBits) == startIdx && t != null && t.text().compareTo(target.text())<=0) {
+ // we are already in the right block and the current term is before the term we want,
+ // so we don't need to seek.
+ } else {
+ // seek to the right block
+ tenum = reader.terms(target.createTerm(tindex.index[startIdx]));
+ pos = startIdx << tindex.intervalBits;
+ setTerm(); // should be true since it's in the index
+ }
+
+
+ while (t != null && t.text().compareTo(target.text()) < 0) {
+ next();
+ }
+
+ return t != null;
+ }
+
+
+ public boolean skipTo(int termNumber) throws IOException {
+ int delta = termNumber - pos;
+ if (delta < 0 || delta > tindex.interval || tenum==null) {
+ int idx = termNumber >>> tindex.intervalBits;
+ String base = tindex.index[idx];
+ pos = idx << tindex.intervalBits;
+ delta = termNumber - pos;
+ tenum = reader.terms(tindex.createTerm(base));
+ }
+ while (--delta >= 0) {
+ boolean b = tenum.next();
+ if (b==false) {
+ t = null;
+ return false;
+ }
+ ++pos;
+ }
+ return setTerm();
+ }
+
+ /** The current term number, starting at 0.
+ * Only valid if the previous call to next() or skipTo() returned true.
+ */
+ public int getTermNumber() {
+ return pos;
+ }
+}
+
+
+/**
+ * Class to save memory by only storing every nth term (for random access), while
+ * numbering the terms, allowing them to be retrieved later by number.
+ * This is only valid when used with the IndexReader it was created with.
+ * The IndexReader is not actually stored to facilitate caching by using it as a key in
+ * a weak hash map.
+ */
+class TermIndex {
+ final static int intervalBits = 7; // decrease to a low number like 2 for testing
+ final static int intervalMask = 0xffffffff >>> (32-intervalBits);
+ final static int interval = 1 << intervalBits;
+
+ final Term fterm; // prototype to be used in term construction w/o String.intern overhead
+ String[] index;
+ int nTerms;
+ long sizeOfStrings;
+
+ TermIndex(String field) {
+ this.fterm = new Term(field, "");
+ }
+
+ Term createTerm(String termVal) {
+ return fterm.createTerm(termVal);
+ }
+
+ NumberedTermEnum getEnumerator(IndexReader reader, int termNumber) throws IOException {
+ NumberedTermEnum te = new NumberedTermEnum(reader, this);
+ te.skipTo(termNumber);
+ return te;
+ }
+
+ /* The first time an enumerator is requested, it should be used
+ with next() to fully traverse all of the terms so the index
+ will be built.
+ */
+ NumberedTermEnum getEnumerator(IndexReader reader) throws IOException {
+ if (index==null) return new NumberedTermEnum(reader,this,"",0) {
+ ArrayList<String> lst;
+
+ protected boolean setTerm() {
+ boolean b = super.setTerm();
+ if (b && (pos & intervalMask)==0) {
+ String text = term().text();
+ sizeOfStrings += text.length() << 1;
+ if (lst==null) {
+ lst = new ArrayList<String>();
+ }
+ lst.add(text);
+ }
+ return b;
+ }
+
+ public boolean skipTo(Term target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean skipTo(int termNumber) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ public void close() throws IOException {
+ nTerms=pos;
+ super.close();
+ index = lst!=null ? lst.toArray(new String[lst.size()]) : new String[0];
+ }
+ };
+ else return new NumberedTermEnum(reader,this,"",0);
+ }
+
+
+ /**
+ * Returns the approximate amount of memory taken by this DocSet.
+ * This is only an approximation and doesn't take into account java object overhead.
+ *
+ * @return
+ * the approximate memory consumption in bytes
+ */
+ public long memSize() {
+ // assume 8 byte references?
+ return 8+8+8+8+(index.length<<3)+sizeOfStrings;
+ }
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/solr/trunk/src/test/org/apache/solr/request/SimpleFacetsTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/request/SimpleFacetsTest.java?rev=720403&r1=720402&r2=720403&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/request/SimpleFacetsTest.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/request/SimpleFacetsTest.java Mon Nov 24 20:02:09 2008
@@ -303,9 +303,10 @@
}
public void testFacetMultiValued() {
- doFacets("t_s");
- doFacets("t_s", "facet.enum.cache.minDf", "2");
- doFacets("t_s", "facet.enum.cache.minDf", "100");
+ doFacetPrefix("t_s", "facet.method","enum");
+ doFacetPrefix("t_s", "facet.method", "enum", "facet.enum.cache.minDf", "2");
+ doFacetPrefix("t_s", "facet.method", "enum", "facet.enum.cache.minDf", "100");
+ doFacetPrefix("t_s", "facet.method", "fc");
}
public void testFacetSingleValued() {
@@ -476,9 +477,10 @@
public void testFacetPrefixMultiValued() {
- doFacetPrefix("t_s");
- doFacetPrefix("t_s", "facet.enum.cache.minDf", "3");
- doFacetPrefix("t_s", "facet.enum.cache.minDf", "100");
+ doFacetPrefix("t_s", "facet.method","enum");
+ doFacetPrefix("t_s", "facet.method", "enum", "facet.enum.cache.minDf", "3");
+ doFacetPrefix("t_s", "facet.method", "enum", "facet.enum.cache.minDf", "100");
+ doFacetPrefix("t_s", "facet.method", "fc");
}
public void testFacetPrefixSingleValued() {
Added: lucene/solr/trunk/src/test/org/apache/solr/request/TestFaceting.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/request/TestFaceting.java?rev=720403&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/request/TestFaceting.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/request/TestFaceting.java Mon Nov 24 20:02:09 2008
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.request;
+
+import org.apache.lucene.index.Term;
+import org.apache.solr.util.AbstractSolrTestCase;
+
+import java.util.Random;
+
+/**
+ * @version $Id$
+ */
+public class TestFaceting extends AbstractSolrTestCase {
+
+ public String getSchemaFile() { return "schema11.xml"; }
+ public String getSolrConfigFile() { return "solrconfig.xml"; }
+
+ public void setUp() throws Exception {
+ super.setUp();
+ }
+ public void tearDown() throws Exception {
+ close();
+ super.tearDown();
+ }
+
+ String t(int tnum) {
+ return String.format("%08d", tnum);
+ }
+
+ void createIndex(int nTerms) {
+ assertU(delQ("*:*"));
+ for (int i=0; i<nTerms; i++) {
+ assertU(adoc("id", Float.toString(i), proto.field(), t(i) ));
+ }
+ assertU(optimize()); // squeeze out any possible deleted docs
+ }
+
+ Term proto = new Term("field_s","");
+ SolrQueryRequest req; // used to get a searcher
+ void close() {
+ if (req!=null) req.close();
+ req = null;
+ }
+
+ void doTermEnum(int size) throws Exception {
+ close();
+ createIndex(size);
+ req = lrf.makeRequest("q","*:*");
+
+ TermIndex ti = new TermIndex(proto.field());
+ NumberedTermEnum te = ti.getEnumerator(req.getSearcher().getReader());
+
+ // iterate through first
+ while(te.term() != null) te.next();
+ assertEquals(size, te.getTermNumber());
+ te.close();
+
+ te = ti.getEnumerator(req.getSearcher().getReader());
+
+ Random r = new Random(size);
+ // test seeking by term string
+ for (int i=0; i<size*2+10; i++) {
+ int rnum = r.nextInt(size+2);
+ String s = t(rnum);
+ boolean b = te.skipTo(proto.createTerm(s));
+ assertEquals(b, rnum < size);
+ if (rnum < size) {
+ assertEquals(rnum, te.pos);
+ assertEquals(s, te.term().text());
+ } else {
+ assertEquals(null, te.term());
+ assertEquals(size, te.getTermNumber());
+ }
+ }
+
+ // test seeking before term
+ assertEquals(size>0, te.skipTo(proto.createTerm("000")));
+ assertEquals(0, te.getTermNumber());
+ if (size>0) {
+ assertEquals(t(0), te.term().text());
+ } else {
+ assertEquals(null, te.term());
+ }
+
+ if (size>0) {
+ // test seeking by term number
+ for (int i=0; i<size*2+10; i++) {
+ int rnum = r.nextInt(size);
+ String s = t(rnum);
+ boolean b = te.skipTo(rnum);
+ assertEquals(true, b);
+ assertEquals(rnum, te.pos);
+ assertEquals(s, te.term().text());
+ }
+ }
+ }
+
+ public void testTermEnum() throws Exception {
+ doTermEnum(0);
+ doTermEnum(1);
+ doTermEnum(TermIndex.interval - 1); // test boundaries around the block size
+ doTermEnum(TermIndex.interval);
+ doTermEnum(TermIndex.interval + 1);
+ doTermEnum(TermIndex.interval * 2 + 2);
+ // doTermEnum(TermIndex.interval * 3 + 3);
+ }
+
+ public void testFacets() throws Exception {
+ StringBuilder sb = new StringBuilder();
+
+ // go over 4096 to test some of the buffer resizing
+ for (int i=0; i<5000; i++) {
+ sb.append(t(i));
+ sb.append(' ');
+ }
+
+ assertU(adoc("id", "1", "many_ws", sb.toString()));
+ assertU(commit());
+
+ assertQ("check many tokens",
+ req("q", "id:1","indent","true"
+ ,"facet", "true", "facet.method","fc"
+ ,"facet.field", "many_ws"
+ ,"facet.limit", "-1"
+ )
+ ,"*[count(//lst[@name='many_ws']/int)=5000]"
+ ,"//lst[@name='many_ws']/int[@name='" + t(0) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(1) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(2) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(3) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(5) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4092) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4093) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4094) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4095) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4096) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4097) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4098) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4090) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4999) + "'][.='1']"
+ );
+
+ // test gaps that take more than one byte
+ sb = new StringBuilder();
+ sb.append(t(0)).append(' ');
+ sb.append(t(150)).append(' ');
+ sb.append(t(301)).append(' ');
+ sb.append(t(453)).append(' ');
+ sb.append(t(606)).append(' ');
+ sb.append(t(1000)).append(' ');
+ sb.append(t(2010)).append(' ');
+ sb.append(t(3050)).append(' ');
+ sb.append(t(4999)).append(' ');
+ assertU(adoc("id", "2", "many_ws", sb.toString()));
+ assertQ("check many tokens",
+ req("q", "id:1","indent","true"
+ ,"facet", "true", "facet.method","fc"
+ ,"facet.field", "many_ws"
+ ,"facet.limit", "-1"
+ )
+ ,"*[count(//lst[@name='many_ws']/int)=5000]"
+ ,"//lst[@name='many_ws']/int[@name='" + t(0) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(150) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(301) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(453) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(606) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(1000) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(2010) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(3050) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(4999) + "'][.='1']"
+ );
+ }
+
+ public void testRegularBig() throws Exception {
+ StringBuilder sb = new StringBuilder();
+
+ // go over 4096 to test some of the buffer resizing
+ int nTerms=7;
+ for (int i=0; i<nTerms; i++) {
+ sb.append(t(i));
+ sb.append(' ');
+ }
+ String many_ws = sb.toString();
+
+ int i1=1000000;
+
+ // int iter=65536+10;
+ int iter=1000;
+
+ for (int i=0; i<iter; i++) {
+ // assertU(adoc("id", t(i), "many_ws", many_ws + t(i1+i) + " " + t(i1*2+i)));
+ assertU(adoc("id", t(i), "many_ws", t(i1+i) + " " + t(i1*2+i)));
+ }
+ assertU(commit());
+
+ for (int i=0; i<iter; i+=iter/10) {
+ assertQ("check many tokens",
+ req("q", "id:"+t(i),"indent","true"
+ ,"facet", "true", "facet.method","fc"
+ ,"facet.field", "many_ws"
+ ,"facet.limit", "-1"
+ ,"facet.mincount", "1"
+ )
+ ,"*[count(//lst[@name='many_ws']/int)=" + 2 + "]"
+ ,"//lst[@name='many_ws']/int[@name='" + t(i1+i) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(i1*2+i) + "'][.='1']"
+ );
+ }
+
+ int i=iter-1;
+ assertQ("check many tokens",
+ req("q", "id:"+t(i),"indent","true"
+ ,"facet", "true", "facet.method","fc"
+ ,"facet.field", "many_ws"
+ ,"facet.limit", "-1"
+ ,"facet.mincount", "1"
+
+ )
+ ,"*[count(//lst[@name='many_ws']/int)=" + 2 + "]"
+ ,"//lst[@name='many_ws']/int[@name='" + t(i1+i) + "'][.='1']"
+ ,"//lst[@name='many_ws']/int[@name='" + t(i1*2+i) + "'][.='1']"
+ );
+ }
+
+
+}
\ No newline at end of file
Propchange: lucene/solr/trunk/src/test/org/apache/solr/request/TestFaceting.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/test/org/apache/solr/request/TestFaceting.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/solr/trunk/src/test/org/apache/solr/request/TestFaceting.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml?rev=720403&r1=720402&r2=720403&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml Mon Nov 24 20:02:09 2008
@@ -285,6 +285,7 @@
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
+ <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
<dynamicField name="*_extf" type="file"/>