You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ry...@apache.org on 2011/11/14 20:58:05 UTC
svn commit: r1201855 - in /lucene/dev/trunk/solr/core/src:
java/org/apache/solr/handler/component/ java/org/apache/solr/request/
test/org/apache/solr/handler/component/
Author: ryan
Date: Mon Nov 14 19:58:04 2011
New Revision: 1201855
URL: http://svn.apache.org/viewvc?rev=1201855&view=rev
Log:
SOLR-1023: StatsComponent can now support dates (and other non-numeric fields)
Added:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java (with props)
Modified:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValues.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/UnInvertedField.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java?rev=1201855&r1=1201854&r2=1201855&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java Mon Nov 14 19:58:04 2011
@@ -19,6 +19,7 @@ package org.apache.solr.handler.componen
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
import java.util.ArrayList;
import java.util.HashMap;
@@ -38,7 +39,8 @@ import java.util.Map;
public class FieldFacetStats {
public final String name;
final FieldCache.DocTermsIndex si;
- final FieldType ft;
+ final SchemaField facet_sf;
+ final SchemaField field_sf;
final int startTermIndex;
final int endTermIndex;
@@ -52,10 +54,11 @@ public class FieldFacetStats {
private final BytesRef tempBR = new BytesRef();
- public FieldFacetStats(String name, FieldCache.DocTermsIndex si, FieldType ft, int numStatsTerms) {
+ public FieldFacetStats(String name, FieldCache.DocTermsIndex si, SchemaField field_sf, SchemaField facet_sf, int numStatsTerms) {
this.name = name;
this.si = si;
- this.ft = ft;
+ this.field_sf = field_sf;
+ this.facet_sf = facet_sf;
this.numStatsTerms = numStatsTerms;
startTermIndex = 1;
@@ -82,22 +85,22 @@ public class FieldFacetStats {
}
}
- public boolean facet(int docID, Double v) {
+ public boolean facet(int docID, BytesRef v) {
int term = si.getOrd(docID);
int arrIdx = term - startTermIndex;
if (arrIdx >= 0 && arrIdx < nTerms) {
final BytesRef br = si.lookup(term, tempBR);
- String key = ft.indexedToReadable(br == null ? null : br.utf8ToString());
+ String key = (br == null)?null:facet_sf.getType().indexedToReadable(br.utf8ToString());
StatsValues stats = facetStatsValues.get(key);
if (stats == null) {
- stats = new StatsValues();
+ stats = StatsValuesFactory.createStatsValues(field_sf);
facetStatsValues.put(key, stats);
}
- if (v != null) {
+ if (v != null && v.length>0) {
stats.accumulate(v);
} else {
- stats.missing++;
+ stats.missing();
return false;
}
return true;
@@ -129,14 +132,14 @@ public class FieldFacetStats {
//function to accumulate counts for statsTermNum to specified value
- public boolean accumulateTermNum(int statsTermNum, Double value) {
+ public boolean accumulateTermNum(int statsTermNum, BytesRef value) {
if (value == null) return false;
for (Map.Entry<String, Integer> stringIntegerEntry : facetStatsTerms.get(statsTermNum).entrySet()) {
Map.Entry pairs = (Map.Entry) stringIntegerEntry;
String key = (String) pairs.getKey();
StatsValues facetStats = facetStatsValues.get(key);
if (facetStats == null) {
- facetStats = new StatsValues();
+ facetStats = StatsValuesFactory.createStatsValues(field_sf);
facetStatsValues.put(key, facetStats);
}
Integer count = (Integer) pairs.getValue();
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java?rev=1201855&r1=1201854&r2=1201855&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java Mon Nov 14 19:58:04 2011
@@ -18,12 +18,14 @@
package org.apache.solr.handler.component;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CharsRef;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.StatsParams;
import org.apache.solr.common.params.ShardParams;
@@ -177,7 +179,8 @@ class StatsInfo {
String[] statsFs = params.getParams(StatsParams.STATS_FIELD);
if (statsFs != null) {
for (String field : statsFs) {
- statsFields.put(field,new StatsValues());
+ SchemaField sf = rb.req.getSchema().getField(field);
+ statsFields.put(field, StatsValuesFactory.createStatsValues(sf));
}
}
}
@@ -244,58 +247,60 @@ class SimpleStats {
}
public NamedList<?> getFieldCacheStats(String fieldName, String[] facet ) {
- FieldType ft = searcher.getSchema().getFieldType(fieldName);
-
- FieldCache.DocTermsIndex si = null;
+ SchemaField sf = searcher.getSchema().getField(fieldName);
+
+ FieldCache.DocTermsIndex si;
try {
si = FieldCache.DEFAULT.getTermsIndex(searcher.getIndexReader(), fieldName);
}
catch (IOException e) {
throw new RuntimeException( "failed to open field cache for: "+fieldName, e );
}
- FieldFacetStats all = new FieldFacetStats( "all", si, ft, 0 );
- StatsValues allstats = new StatsValues();
- if ( all.nTerms <= 0 || docs.size() <= 0 ) return allstats.getStatsValues();
-
- // don't worry about faceting if the no documents match...
- int i=0;
- final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
- for( String f : facet ) {
- ft = searcher.getSchema().getFieldType(f);
+ StatsValues allstats = StatsValuesFactory.createStatsValues(sf);
+ final int nTerms = si.numOrd();
+ if ( nTerms <= 0 || docs.size() <= 0 ) return allstats.getStatsValues();
+
+ // don't worry about faceting if no documents match...
+ List<FieldFacetStats> facetStats = new ArrayList<FieldFacetStats>();
+ FieldCache.DocTermsIndex facetTermsIndex;
+ for( String facetField : facet ) {
+ SchemaField fsf = searcher.getSchema().getField(facetField);
+ FieldType facetFieldType = fsf.getType();
+
+ if (facetFieldType.isTokenized() || facetFieldType.isMultiValued()) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "Stats can only facet on single-valued fields, not: " + facetField
+ + "[" + facetFieldType + "]");
+ }
try {
- si = FieldCache.DEFAULT.getTermsIndex(searcher.getIndexReader(), f);
- }
+ facetTermsIndex = FieldCache.DEFAULT.getTermsIndex(searcher.getIndexReader(), facetField);
+ }
catch (IOException e) {
- throw new RuntimeException( "failed to open field cache for: "+f, e );
+ throw new RuntimeException( "failed to open field cache for: "
+ + facetField, e );
}
- finfo[i++] = new FieldFacetStats( f, si, ft, 0 );
+ facetStats.add(new FieldFacetStats(facetField, facetTermsIndex, sf, fsf, nTerms));
}
- final CharsRef spare = new CharsRef();
+
final BytesRef tempBR = new BytesRef();
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int docID = iter.nextDoc();
- BytesRef raw = all.getTermText(docID, tempBR);
- Double v = null;
- if( raw != null ) {
- v = Double.parseDouble(all.ft.indexedToReadable(raw, spare).toString());
- allstats.accumulate(v);
- }
- else {
- allstats.missing++;
+ BytesRef raw = si.lookup(si.getOrd(docID), tempBR);
+ if( raw.length > 0 ) {
+ allstats.accumulate(raw);
+ } else {
+ allstats.missing();
}
-
- // now check the facets
- for( FieldFacetStats f : finfo ) {
- f.facet(docID, v);
+
+ // now update the facets
+ for (FieldFacetStats f : facetStats) {
+ f.facet(docID, raw);
}
}
-
- if( finfo.length > 0 ) {
- allstats.facets = new HashMap<String, Map<String,StatsValues>>();
- for( FieldFacetStats f : finfo ) {
- allstats.facets.put( f.name, f.facetStatsValues );
- }
+
+ for (FieldFacetStats f : facetStats) {
+ allstats.addFacet(f.name, f.facetStatsValues);
}
return allstats.getStatsValues();
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValues.java?rev=1201855&r1=1201854&r2=1201855&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValues.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValues.java Mon Nov 14 19:58:04 2011
@@ -19,141 +19,62 @@
package org.apache.solr.handler.component;
-import java.util.HashMap;
import java.util.Map;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.SimpleOrderedMap;
-
-/** 2/11/2009 - Moved out of StatsComponent to allow open access to UnInvertedField
- * StatsValues is a utility to accumulate statistics on a set of values
- *
- * </p>
- * @see org.apache.solr.handler.component.StatsComponent
- *
-*/
-
-public class StatsValues {
- private static final String FACETS = "facets";
- double min;
- double max;
- double sum;
- double sumOfSquares;
- long count;
- long missing;
-
- // facetField facetValue
- public Map<String, Map<String,StatsValues>> facets;
-
- public StatsValues() {
- reset();
- }
-
- public void accumulate(NamedList stv){
- min = Math.min(min, (Double)stv.get("min"));
- max = Math.max(max, (Double)stv.get("max"));
- sum += (Double)stv.get("sum");
- count += (Long)stv.get("count");
- missing += (Long)stv.get("missing");
- sumOfSquares += (Double)stv.get("sumOfSquares");
-
- NamedList f = (NamedList)stv.get( FACETS );
- if( f != null ) {
- if( facets == null ) {
- facets = new HashMap<String, Map<String,StatsValues>>();
- }
-
- for( int i=0; i< f.size(); i++ ) {
- String field = f.getName(i);
- NamedList vals = (NamedList)f.getVal( i );
- Map<String,StatsValues> addTo = facets.get( field );
- if( addTo == null ) {
- addTo = new HashMap<String,StatsValues>();
- facets.put( field, addTo );
- }
- for( int j=0; j< vals.size(); j++ ) {
- String val = vals.getName(j);
- StatsValues vvals = addTo.get( val );
- if( vvals == null ) {
- vvals = new StatsValues();
- addTo.put( val, vvals );
- }
- vvals.accumulate( (NamedList)vals.getVal( j ) );
- }
- }
- }
- }
-
- public void accumulate(double v){
- sumOfSquares += (v*v); // for std deviation
- min = Math.min(min, v);
- max = Math.max(max, v);
- sum += v;
- count++;
- }
-
- public void accumulate(double v, int c){
- sumOfSquares += (v*v*c); // for std deviation
- min = Math.min(min, v);
- max = Math.max(max, v);
- sum += v*c;
- count+= c;
- }
-
- public void addMissing(int c){
- missing += c;
- }
-
- public double getAverage(){
- return sum / count;
- }
-
- public double getStandardDeviation()
- {
- if( count <= 1.0D )
- return 0.0D;
-
- return Math.sqrt( ( ( count * sumOfSquares ) - ( sum * sum ) )
- / ( count * ( count - 1.0D ) ) );
- }
-
- public long getCount()
- {
- return count;
- }
-
- public void reset(){
- min = Double.MAX_VALUE;
- max = -1.0*Double.MAX_VALUE;
- sum = count = missing = 0;
- sumOfSquares = 0;
- facets = null;
- }
-
- public NamedList<?> getStatsValues(){
- NamedList<Object> res = new SimpleOrderedMap<Object>();
- res.add("min", min);
- res.add("max", max);
- res.add("sum", sum);
- res.add("count", count);
- res.add("missing", missing);
- res.add("sumOfSquares", sumOfSquares );
- res.add("mean", getAverage());
- res.add( "stddev", getStandardDeviation() );
-
- // add the facet stats
- if( facets != null && facets.size() > 0 ) {
- NamedList<NamedList<?>> nl = new SimpleOrderedMap<NamedList<?>>();
- for( Map.Entry<String, Map<String,StatsValues>> entry : facets.entrySet() ) {
- NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<NamedList<?>>();
- nl.add( entry.getKey(), nl2 );
- for( Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet() ) {
- nl2.add( e2.getKey(), e2.getValue().getStatsValues() );
- }
- }
- res.add( FACETS, nl );
- }
- return res;
- }
+/**
+ * StatsValue defines the interface for the collection of statistical values about fields and facets.
+ */
+public interface StatsValues {
+
+ /**
+ * Accumulate the values based on those in the given NamedList
+ *
+ * @param stv NamedList whose values will be used to accumulate the current values
+ */
+ void accumulate(NamedList stv);
+
+ /**
+ * Accumulate the values based on the given value
+ *
+ * @param value Value to use to accumulate the current values
+ */
+ void accumulate(BytesRef value);
+
+ /**
+ * Accumulate the values based on the given value
+ *
+ * @param value Value to use to accumulate the current values
+ * @param count number of times to accumulate this value
+ */
+ void accumulate(BytesRef value, int count);
+
+ /**
+ * Updates the statistics when a document is missing a value
+ */
+ void missing();
+
+ /**
+ * Updates the statistics when multiple documents are missing a value
+ *
+ * @param count number of times to count a missing value
+ */
+ void addMissing(int count);
+
+ /**
+ * Adds the facet statistics for the facet with the given name
+ *
+ * @param facetName Name of the facet
+ * @param facetValues Facet statistics on a per facet value basis
+ */
+ void addFacet(String facetName, Map<String, StatsValues> facetValues);
+
+ /**
+ * Translates the values into a NamedList representation
+ *
+ * @return NamedList representation of the current values
+ */
+ NamedList<?> getStatsValues();
}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java?rev=1201855&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java Mon Nov 14 19:58:04 2011
@@ -0,0 +1,437 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler.component;
+
+import java.util.Date;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.schema.*;
+
+/**
+ * Factory class for creating instance of {@link org.apache.solr.handler.component.StatsValues}
+ */
+public class StatsValuesFactory {
+
+ /**
+ * Creates an instance of StatsValues which supports values from a field of the given FieldType
+ *
+ * @param sf SchemaField for the field whose statistics will be created by the resulting StatsValues
+ * @return Instance of StatsValues that will create statistics from values from a field of the given type
+ */
+ public static StatsValues createStatsValues(SchemaField sf) {
+ FieldType fieldType = sf.getType();
+ if (DoubleField.class.isInstance(fieldType) ||
+ IntField.class.isInstance(fieldType) ||
+ LongField.class.isInstance(fieldType) ||
+ ShortField.class.isInstance(fieldType) ||
+ FloatField.class.isInstance(fieldType) ||
+ ByteField.class.isInstance(fieldType) ||
+ TrieField.class.isInstance(fieldType) ||
+ SortableDoubleField.class.isInstance(fieldType) ||
+ SortableIntField.class.isInstance(fieldType) ||
+ SortableLongField.class.isInstance(fieldType) ||
+ SortableFloatField.class.isInstance(fieldType)) {
+ return new NumericStatsValues(sf);
+ } else if (DateField.class.isInstance(fieldType)) {
+ return new DateStatsValues(sf);
+ } else if (StrField.class.isInstance(fieldType)) {
+ return new StringStatsValues(sf);
+ } else {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported");
+ }
+ }
+}
+
+/**
+ * Abstract implementation of {@link org.apache.solr.handler.component.StatsValues} that provides the default behavior
+ * for most StatsValues implementations.
+ *
+ * There are very few requirements placed on what statistics concrete implementations should collect, with the only required
+ * statistics being the minimum and maximum values.
+ */
+abstract class AbstractStatsValues<T> implements StatsValues {
+ private static final String FACETS = "facets";
+ final protected SchemaField sf;
+ final protected FieldType ft;
+ protected T max;
+ protected T min;
+ protected long missing;
+ protected long count;
+
+ // facetField facetValue
+ protected Map<String, Map<String, StatsValues>> facets = new HashMap<String, Map<String, StatsValues>>();
+
+ protected AbstractStatsValues(SchemaField sf) {
+ this.sf = sf;
+ this.ft = sf.getType();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void accumulate(NamedList stv) {
+ count += (Long) stv.get("count");
+ missing += (Long) stv.get("missing");
+
+ updateMinMax((T) stv.get("min"), (T) stv.get("max"));
+ updateTypeSpecificStats(stv);
+
+ NamedList f = (NamedList) stv.get(FACETS);
+ if (f == null) {
+ return;
+ }
+
+ for (int i = 0; i < f.size(); i++) {
+ String field = f.getName(i);
+ NamedList vals = (NamedList) f.getVal(i);
+ Map<String, StatsValues> addTo = facets.get(field);
+ if (addTo == null) {
+ addTo = new HashMap<String, StatsValues>();
+ facets.put(field, addTo);
+ }
+ for (int j = 0; j < vals.size(); j++) {
+ String val = vals.getName(j);
+ StatsValues vvals = addTo.get(val);
+ if (vvals == null) {
+ vvals = StatsValuesFactory.createStatsValues(sf);
+ addTo.put(val, vvals);
+ }
+ vvals.accumulate((NamedList) vals.getVal(j));
+ }
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void accumulate(BytesRef value) {
+ count++;
+ T typedValue = (T)ft.toObject(sf, value);
+ updateMinMax(typedValue, typedValue);
+ updateTypeSpecificStats(typedValue);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void accumulate(BytesRef value, int count) {
+ this.count += count;
+ T typedValue = (T)ft.toObject(sf, value);
+ updateMinMax(typedValue, typedValue);
+ updateTypeSpecificStats(typedValue, count);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void missing() {
+ missing++;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void addMissing(int count) {
+ missing += count;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void addFacet(String facetName, Map<String, StatsValues> facetValues) {
+ facets.put(facetName, facetValues);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public NamedList<?> getStatsValues() {
+ NamedList<Object> res = new SimpleOrderedMap<Object>();
+
+ res.add("min", min);
+ res.add("max", max);
+ res.add("count", count);
+ res.add("missing", missing);
+ addTypeSpecificStats(res);
+
+ // add the facet stats
+ NamedList<NamedList<?>> nl = new SimpleOrderedMap<NamedList<?>>();
+ for (Map.Entry<String, Map<String, StatsValues>> entry : facets.entrySet()) {
+ NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<NamedList<?>>();
+ nl.add(entry.getKey(), nl2);
+ for (Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet()) {
+ nl2.add(e2.getKey(), e2.getValue().getStatsValues());
+ }
+ res.add(FACETS, nl);
+ }
+ return res;
+ }
+
+ /**
+ * Updates the minimum and maximum statistics based on the given values
+ *
+ * @param min Value that the current minimum should be updated against
+ * @param max Value that the current maximum should be updated against
+ */
+ protected abstract void updateMinMax(T min, T max);
+
+ /**
+ * Updates the type specific statistics based on the given value
+ *
+ * @param value Value the statistics should be updated against
+ */
+ protected abstract void updateTypeSpecificStats(T value);
+
+ /**
+ * Updates the type specific statistics based on the given value
+ *
+ * @param value Value the statistics should be updated against
+ * @param count Number of times the value is being accumulated
+ */
+ protected abstract void updateTypeSpecificStats(T value, int count);
+
+ /**
+ * Updates the type specific statistics based on the values in the given list
+ *
+ * @param stv List containing values the current statistics should be updated against
+ */
+ protected abstract void updateTypeSpecificStats(NamedList stv);
+
+ /**
+ * Add any type specific statistics to the given NamedList
+ *
+ * @param res NamedList to add the type specific statistics too
+ */
+ protected abstract void addTypeSpecificStats(NamedList<Object> res);
+}
+
+ /**
+ * Implementation of StatsValues that supports Double values
+ */
+class NumericStatsValues extends AbstractStatsValues<Number> {
+
+ double sum;
+ double sumOfSquares;
+
+ public NumericStatsValues(SchemaField sf) {
+ super(sf);
+ min = Double.POSITIVE_INFINITY;
+ max = Double.NEGATIVE_INFINITY;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void updateTypeSpecificStats(NamedList stv) {
+ sum += ((Number)stv.get("sum")).doubleValue();
+ sumOfSquares += ((Number)stv.get("sumOfSquares")).doubleValue();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void updateTypeSpecificStats(Number v) {
+ double value = v.doubleValue();
+ sumOfSquares += (value * value); // for std deviation
+ sum += value;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void updateTypeSpecificStats(Number v, int count) {
+ double value = v.doubleValue();
+ sumOfSquares += (value * value * count); // for std deviation
+ sum += value * count;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void updateMinMax(Number min, Number max) {
+ this.min = Math.min(this.min.doubleValue(), min.doubleValue());
+ this.max = Math.max(this.max.doubleValue(), max.doubleValue());
+ }
+
+ /**
+ * Adds sum, sumOfSquares, mean and standard deviation statistics to the given NamedList
+ *
+ * @param res NamedList to add the type specific statistics too
+ */
+ protected void addTypeSpecificStats(NamedList<Object> res) {
+ res.add("sum", sum);
+ res.add("sumOfSquares", sumOfSquares);
+ res.add("mean", sum / count);
+ res.add("stddev", getStandardDeviation());
+ }
+
+ /**
+ * Calculates the standard deviation statistic
+ *
+ * @return Standard deviation statistic
+ */
+ private double getStandardDeviation() {
+ if (count <= 1.0D) {
+ return 0.0D;
+ }
+
+ return Math.sqrt(((count * sumOfSquares) - (sum * sum)) / (count * (count - 1.0D)));
+ }
+}
+
+/**
+ * Implementation of StatsValues that supports Date values
+ */
+class DateStatsValues extends AbstractStatsValues<Date> {
+
+ private static final DateField DATE_FIELD = new DateField();
+
+ private long sum;
+
+ public DateStatsValues(SchemaField sf) {
+ super(sf);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void updateTypeSpecificStats(NamedList stv) {
+ sum += ((Date) stv.get("sum")).getTime();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void updateTypeSpecificStats(Date value) {
+ sum += value.getTime();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void updateTypeSpecificStats(Date value, int count) {
+ sum += value.getTime() * count;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void updateMinMax(Date min, Date max) {
+ if(this.min==null || this.min.after(min)) {
+ this.min = min;
+ }
+ if(this.max==null || this.max.before(min)) {
+ this.max = max;
+ }
+ }
+
+ /**
+ * Adds sum and mean statistics to the given NamedList
+ *
+ * @param res NamedList to add the type specific statistics too
+ */
+ protected void addTypeSpecificStats(NamedList<Object> res) {
+ res.add("sum", new Date(sum));
+ res.add("mean", new Date(sum / count));
+ }
+}
+
+/**
+ * Implementation of StatsValues that supports String values
+ */
+class StringStatsValues extends AbstractStatsValues<String> {
+
+ public StringStatsValues(SchemaField sf) {
+ super(sf);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void updateTypeSpecificStats(NamedList stv) {
+ // No type specific stats
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void updateTypeSpecificStats(String value) {
+ // No type specific stats
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void updateTypeSpecificStats(String value, int count) {
+ // No type specific stats
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void updateMinMax(String min, String max) {
+ this.max = max(this.max, max);
+ this.min = min(this.min, min);
+ }
+
+ /**
+ * Adds no type specific statistics
+ */
+ protected void addTypeSpecificStats(NamedList<Object> res) {
+ // Add no statistics
+ }
+
+ /**
+ * Determines which of the given Strings is the maximum, as computed by {@link String#compareTo(Object)}
+ *
+ * @param str1 String to compare against b
+ * @param str2 String compared against a
+ * @return str1 if it is considered greater by {@link String#compareTo(Object)}, str2 otherwise
+ */
+ private static String max(String str1, String str2) {
+ if (str1 == null) {
+ return str2;
+ } else if (str2 == null) {
+ return str1;
+ }
+ return (str1.compareTo(str2) > 0) ? str1 : str2;
+ }
+
+ /**
+ * Determines which of the given Strings is the minimum, as computed by {@link String#compareTo(Object)}
+ *
+ * @param str1 String to compare against b
+ * @param str2 String compared against a
+ * @return str1 if it is considered less by {@link String#compareTo(Object)}, str2 otherwise
+ */
+ private static String min(String str1, String str2) {
+ if (str1 == null) {
+ return str2;
+ } else if (str2 == null) {
+ return str1;
+ }
+ return (str1.compareTo(str2) < 0) ? str1 : str2;
+ }
+}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/UnInvertedField.java?rev=1201855&r1=1201854&r2=1201855&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/UnInvertedField.java Mon Nov 14 19:58:04 2011
@@ -29,11 +29,13 @@ import org.apache.solr.common.SolrExcept
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.*;
import org.apache.solr.util.LongPriorityQueue;
import org.apache.solr.util.PrimUtils;
import org.apache.solr.handler.component.StatsValues;
+import org.apache.solr.handler.component.StatsValuesFactory;
import org.apache.solr.handler.component.FieldFacetStats;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.OpenBitSet;
@@ -461,7 +463,10 @@ public class UnInvertedField extends Doc
//functionality between the two and refactor code somewhat
use.incrementAndGet();
- StatsValues allstats = new StatsValues();
+ SchemaField sf = searcher.getSchema().getField(field);
+ // FieldType ft = sf.getType();
+
+ StatsValues allstats = StatsValuesFactory.createStatsValues(sf);
DocSet docs = baseDocs;
@@ -470,8 +475,6 @@ public class UnInvertedField extends Doc
if (baseSize <= 0) return allstats;
- FieldType ft = searcher.getSchema().getFieldType(field);
-
DocSet missing = docs.andNot( searcher.getDocSet(new TermRangeQuery(field, null, null, false, false)) );
int i = 0;
@@ -479,14 +482,14 @@ public class UnInvertedField extends Doc
//Initialize facetstats, if facets have been passed in
FieldCache.DocTermsIndex si;
for (String f : facet) {
- FieldType facet_ft = searcher.getSchema().getFieldType(f);
+ SchemaField facet_sf = searcher.getSchema().getField(f);
try {
si = FieldCache.DEFAULT.getTermsIndex(searcher.getIndexReader(), f);
}
catch (IOException e) {
throw new RuntimeException("failed to open field cache for: " + f, e);
}
- finfo[i] = new FieldFacetStats(f, si, facet_ft, numTermsInField);
+ finfo[i] = new FieldFacetStats(f, si, sf, facet_sf, numTermsInField);
i++;
}
@@ -580,14 +583,12 @@ public class UnInvertedField extends Doc
}
}
}
- final CharsRef charsRef = new CharsRef();
+
// add results in index order
for (i = 0; i < numTermsInField; i++) {
int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
if (c == 0) continue;
- String label = getReadableValue(getTermValue(te, i), ft, charsRef);
- // TODO: we should avoid this re-parse
- Double value = Double.parseDouble(label);
+ BytesRef value = getTermValue(te, i);
allstats.accumulate(value, c);
//as we've parsed the termnum into a value, lets also accumulate fieldfacet statistics
@@ -600,7 +601,6 @@ public class UnInvertedField extends Doc
allstats.addMissing(c);
if (finfo.length > 0) {
- allstats.facets = new HashMap<String, Map<String, StatsValues>>();
for (FieldFacetStats f : finfo) {
Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
FieldType facetType = searcher.getSchema().getFieldType(f.name);
@@ -609,7 +609,7 @@ public class UnInvertedField extends Doc
int missingCount = searcher.numDocs(new TermQuery(new Term(f.name, facetType.toInternal(termLabel))), missing);
entry.getValue().addMissing(missingCount);
}
- allstats.facets.put(f.name, facetStatsValues);
+ allstats.addFacet(f.name, facetStatsValues);
}
}
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java?rev=1201855&r1=1201854&r2=1201855&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java Mon Nov 14 19:58:04 2011
@@ -16,8 +16,23 @@ package org.apache.solr.handler.componen
* limitations under the License.
*/
+import java.util.Date;
+import java.util.Locale;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.TimeZone;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.MapSolrParams;
+import org.apache.solr.common.params.StatsParams;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.AbstractSolrTestCase;
+
/**
* Statistics Component Test
*/
@@ -134,6 +149,59 @@ public class StatsComponentTest extends
}
+ public void testFieldStatisticsResultsStringField() throws Exception {
+ SolrCore core = h.getCore();
+ assertU(adoc("id", "1", "active_s", "string1"));
+ assertU(adoc("id", "2", "active_s", "string2"));
+ assertU(adoc("id", "3", "active_s", "string3"));
+ assertU(adoc("id", "4"));
+ assertU(commit());
+
+ Map<String, String> args = new HashMap<String, String>();
+ args.put(CommonParams.Q, "*:*");
+ args.put(StatsParams.STATS, "true");
+ args.put(StatsParams.STATS_FIELD, "active_s");
+ args.put("indent", "true");
+ SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
+
+ assertQ("test string statistics values", req,
+ "//str[@name='min'][.='string1']",
+ "//str[@name='max'][.='string3']",
+ "//long[@name='count'][.='3']",
+ "//long[@name='missing'][.='1']");
+ }
+
+ public void testFieldStatisticsResultsDateField() throws Exception {
+ SolrCore core = h.getCore();
+
+ DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US);
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ String date1 = dateFormat.format(new Date(123456789)) + "Z";
+ String date2 = dateFormat.format(new Date(987654321)) + "Z";
+
+ assertU(adoc("id", "1", "active_dt", date1));
+ assertU(adoc("id", "2", "active_dt", date2));
+ assertU(adoc("id", "3"));
+ assertU(commit());
+
+ Map<String, String> args = new HashMap<String, String>();
+ args.put(CommonParams.Q, "*:*");
+ args.put(StatsParams.STATS, "true");
+ args.put(StatsParams.STATS_FIELD, "active_dt");
+ args.put("indent", "true");
+ SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
+
+ assertQ("test date statistics values", req,
+ "//long[@name='count'][.='2']",
+ "//long[@name='missing'][.='1']",
+ "//date[@name='min'][.='1970-01-02T10:17:36Z']",
+ "//date[@name='max'][.='1970-01-12T10:20:54Z']",
+ "//date[@name='sum'][.='1970-01-13T20:38:30Z']",
+ "//date[@name='mean'][.='1970-01-07T10:19:15Z']");
+ }
+
+
public void doTestFieldStatisticsMissingResult(String f) throws Exception {
assertU(adoc("id", "1", f, "-10"));
@@ -160,8 +228,8 @@ public class StatsComponentTest extends
assertU(adoc("id", "3", f, "30", "active_s", "false"));
assertU(adoc("id", "4", f, "40", "active_s", "false"));
assertU(commit());
-
- assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s")
+
+ assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","indent","true")
, "//lst[@name='true']/double[@name='min'][.='10.0']"
, "//lst[@name='true']/double[@name='max'][.='20.0']"
, "//lst[@name='true']/double[@name='sum'][.='30.0']"