You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ry...@apache.org on 2007/04/29 08:08:47 UTC
svn commit: r533467 -
/lucene/solr/trunk/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
Author: ryan
Date: Sat Apr 28 23:08:47 2007
New Revision: 533467
URL: http://svn.apache.org/viewvc?view=rev&rev=533467
Log:
removing the not quite perfect logic for 'cacheableFaceting' (we should revist this after some upcoming faceting improvements). Adding a 'histogram' to each field - thanks to Yonik for the suggestion. Moved the 'key' to the bottom of the request
Modified:
lucene/solr/trunk/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
Modified: lucene/solr/trunk/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?view=diff&rev=533467&r1=533466&r2=533467
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java Sat Apr 28 23:08:47 2007
@@ -39,9 +39,9 @@
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.PriorityQueue;
-import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrException;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.handler.RequestHandlerUtils;
@@ -51,6 +51,7 @@
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrQueryParser;
import org.apache.solr.util.NamedList;
@@ -67,7 +68,7 @@
* </p>
*
* For more documentation see:
- *
+ * http://wiki.apache.org/solr/LukeRequestHandler
*
* @author ryan
* @version $Id$
@@ -136,9 +137,14 @@
fields.add( f );
}
}
- rsp.add( "key", getFieldFlagsKey() );
rsp.add( "fields", getIndexedFieldsInfo( searcher, fields, numTerms ) ) ;
}
+
+ // Add some generally helpful informaion
+ NamedList<Object> info = new SimpleOrderedMap<Object>();
+ info.add( "key", getFieldFlagsKey() );
+ info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents." );
+ rsp.add( "info", info );
}
/**
@@ -231,16 +237,8 @@
f.add( "value", (ftype==null)?null:ftype.toExternal( fieldable ) );
f.add( "internal", fieldable.stringValue() ); // may be a binary number
f.add( "boost", fieldable.getBoost() );
-
- // TODO? how can this ever be 0?! it is in the document!
- int freq = reader.docFreq( t );
- if( freq > 0 ) {
- f.add( "docFreq", reader.docFreq( t ) );
- }
- else {
- f.add( "docFreq", "zero! How can that be?" );
- }
-
+ f.add( "docFreq", reader.docFreq( t ) ); // this can be 0 for non-indexed fields
+
// If we have a term vector, return that
if( fieldable.isTermVectorStored() ) {
try {
@@ -271,7 +269,6 @@
Query matchAllDocs = new MatchAllDocsQuery();
SolrQueryParser qp = searcher.getSchema().getSolrQueryParser(null);
- int filterCacheSize = SolrConfig.config.getInt( "query/filterCache/@size", -1 );
IndexReader reader = searcher.getReader();
IndexSchema schema = searcher.getSchema();
@@ -281,7 +278,7 @@
Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
for (String fieldName : fieldNames) {
if( fields != null && !fields.contains( fieldName ) ) {
- continue; // if a field is specified, only return one
+ continue; // if a field is specified, only them
}
SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();
@@ -294,32 +291,36 @@
Query q = qp.parse( fieldName+":[* TO *]" );
int docCount = searcher.numDocs( q, matchAllDocs );
-// TODO? Is there a way to get the Fieldable infomation for this field?
-// The following approach works fine for stored fields, but does not work for non-stored fields
-// if( docCount > 0 ) {
-// // Find a document with this field
-// DocList ds = searcher.getDocList( q, (Query)null, (Sort)null, 0, 1 );
-// try {
-// Document doc = searcher.doc( ds.iterator().next() );
-// Fieldable fld = doc.getFieldable( fieldName );
-// f.add( "index", getFieldFlags( fld ) );
-// }
-// catch( Exception ex ) {
-// log.warning( "error reading field: "+fieldName );
-// }
-// // Find one document so we can get the fieldable
-// }
+ if( docCount > 0 ) {
+ // Find a document with this field
+ DocList ds = searcher.getDocList( q, (Query)null, (Sort)null, 0, 1 );
+ try {
+ Document doc = searcher.doc( ds.iterator().next() );
+ Fieldable fld = doc.getFieldable( fieldName );
+ if( fld != null ) {
+ f.add( "index", getFieldFlags( fld ) );
+ }
+ else {
+ // it is a non-stored field...
+ f.add( "index", "(unstored field)" );
+ }
+ }
+ catch( Exception ex ) {
+ log.warning( "error reading field: "+fieldName );
+ }
+ // Find one document so we can get the fieldable
+ }
f.add( "docs", docCount );
TopTermQueue topTerms = ttinfo.get( fieldName );
if( topTerms != null ) {
f.add( "distinct", topTerms.distinctTerms );
- // TODO? is this the correct logic?
- f.add( "cacheableFaceting", topTerms.distinctTerms < filterCacheSize );
-
- // Only show them if we specify something
+ // Include top terms
f.add( "topTerms", topTerms.toNamedList( searcher.getSchema() ) );
+
+ // Add a histogram
+ f.add( "histogram", topTerms.histogram.toNamedList() );
}
// Add the field
@@ -384,6 +385,48 @@
///////////////////////////////////////////////////////////////////////////////////////
+ private static class TermHistogram
+ {
+ int maxBucket = -1;
+ public Map<Integer,Integer> hist = new HashMap<Integer, Integer>();
+
+ private static final double LOG2 = Math.log( 2 );
+ public static int getPowerOfTwoBucket( int num )
+ {
+ int exp = (int)Math.ceil( (Math.log( num ) / LOG2 ) );
+ return (int) Math.pow( 2, exp );
+ }
+
+ public void add( int df )
+ {
+ Integer bucket = getPowerOfTwoBucket( df );
+ if( bucket > maxBucket ) {
+ maxBucket = bucket;
+ }
+ Integer old = hist.get( bucket );
+ if( old == null ) {
+ hist.put( bucket, 1 );
+ }
+ else {
+ hist.put( bucket, old+1 );
+ }
+ }
+
+ // TODO? should this be a list or a map?
+ public NamedList<Integer> toNamedList()
+ {
+ NamedList<Integer> nl = new NamedList<Integer>();
+ for( int bucket = 2; bucket <= maxBucket; bucket *= 2 ) {
+ Integer val = hist.get( bucket );
+ if( val == null ) {
+ val = 0;
+ }
+ nl.add( ""+bucket, val );
+ }
+ return nl;
+ }
+ }
+
/**
* Private internal class that counts up frequent terms
*/
@@ -400,9 +443,11 @@
public int minFreq = 0;
public int distinctTerms = 0;
+ public TermHistogram histogram;
TopTermQueue(int size) {
initialize(size);
+ histogram = new TermHistogram();
}
@Override
@@ -452,6 +497,7 @@
info.put( field, tiq );
}
tiq.distinctTerms++;
+ tiq.histogram.add( terms.docFreq() ); // add the term to the histogram
// Only save the distinct terms for fields we worry about
if (fields != null && fields.size() > 0) {
@@ -474,6 +520,7 @@
return info;
}
}
+