You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2012/02/14 04:09:02 UTC
svn commit: r1243774 - in /lucene/dev/trunk/solr:
core/src/java/org/apache/solr/handler/admin/ core/src/test/org/apache/solr/
core/src/test/org/apache/solr/handler/admin/ webapp/web/admin/
Author: erick
Date: Tue Feb 14 03:09:02 2012
New Revision: 1243774
URL: http://svn.apache.org/viewvc?rev=1243774&view=rev
Log:
Fix for SOLR-3132
Modified:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java
lucene/dev/trunk/solr/webapp/web/admin/schema.jsp
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java Tue Feb 14 03:09:02 2012
@@ -738,7 +738,7 @@ public class CoreAdminHandler extends Re
info.add("uptime", System.currentTimeMillis() - core.getStartTime());
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try {
- SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader(), false);
+ SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader());
long size = getIndexSize(core);
indexInfo.add("sizeInBytes", size);
indexInfo.add("size", NumberUtils.readableSize(size));
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java Tue Feb 14 03:09:02 2012
@@ -82,25 +82,25 @@ public class LukeRequestHandler extends
public static final String DOC_ID = "docId";
public static final String ID = "id";
public static final int DEFAULT_COUNT = 10;
-
+
static final int HIST_ARRAY_SIZE = 33;
-
+
private static enum ShowStyle {
ALL,
DOC,
SCHEMA,
INDEX;
-
+
public static ShowStyle get(String v) {
if(v==null) return null;
- if("schema".equals(v)) return SCHEMA;
- if("index".equals(v)) return INDEX;
- if("doc".equals(v)) return DOC;
- if("all".equals(v)) return ALL;
+ if("schema".equalsIgnoreCase(v)) return SCHEMA;
+ if("index".equalsIgnoreCase(v)) return INDEX;
+ if("doc".equalsIgnoreCase(v)) return DOC;
+ if("all".equalsIgnoreCase(v)) return ALL;
throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown Show Style: "+v);
}
};
-
+
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
@@ -109,29 +109,17 @@ public class LukeRequestHandler extends
SolrIndexSearcher searcher = req.getSearcher();
DirectoryReader reader = searcher.getIndexReader();
SolrParams params = req.getParams();
- int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );
ShowStyle style = ShowStyle.get(params.get("show"));
- // Always show the core lucene info
- Map<String, TopTermQueue> topTerms = new TreeMap<String, TopTermQueue>();
-
// If no doc is given, show all fields and top terms
- Set<String> fields = null;
- String fl = params.get(CommonParams.FL);
- if (fl != null) {
- fields = new TreeSet<String>(Arrays.asList(fl.split( "[,\\s]+" )));
- }
- if( ShowStyle.SCHEMA == style ) {
- numTerms = 0; // Abort any statistics gathering.
- }
- rsp.add("index", getIndexInfo(reader, numTerms, topTerms, fields ));
-
+ rsp.add("index", getIndexInfo(reader));
+
if(ShowStyle.INDEX==style) {
- return; // thats all we need
+ return; // that's all we need
}
-
-
+
+
Integer docId = params.getInt( DOC_ID );
if( docId == null && params.get( ID ) != null ) {
// Look for something with a given solr ID
@@ -170,7 +158,7 @@ public class LukeRequestHandler extends
rsp.add( "schema", getSchemaInfo( req.getSchema() ) );
}
else {
- rsp.add( "fields", getIndexedFieldsInfo( searcher, fields, numTerms, topTerms) ) ;
+ rsp.add( "fields", getIndexedFieldsInfo(req) ) ;
}
// Add some generally helpful information
@@ -255,7 +243,8 @@ public class LukeRequestHandler extends
return key;
}
- private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader, IndexSchema schema ) throws IOException
+ private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader,
+ IndexSchema schema ) throws IOException
{
final CharsRef spare = new CharsRef();
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
@@ -311,13 +300,22 @@ public class LukeRequestHandler extends
}
@SuppressWarnings("unchecked")
- private static SimpleOrderedMap<Object> getIndexedFieldsInfo(
- final SolrIndexSearcher searcher, final Set<String> fields, final int numTerms, Map<String,TopTermQueue> ttinfo)
+ private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req)
throws Exception {
+ SolrIndexSearcher searcher = req.getSearcher();
+ SolrParams params = req.getParams();
+
+ Set<String> fields = null;
+ String fl = params.get(CommonParams.FL);
+ if (fl != null) {
+ fields = new TreeSet<String>(Arrays.asList(fl.split( "[,\\s]+" )));
+ }
+
AtomicReader reader = searcher.getAtomicReader();
IndexSchema schema = searcher.getSchema();
+ // Don't be tempted to put this in the loop below, the whole point here is to alphabetize the fields!
Set<String> fieldNames = new TreeSet<String>();
for(FieldInfo fieldInfo : reader.getFieldInfos()) {
fieldNames.add(fieldInfo.name);
@@ -325,82 +323,90 @@ public class LukeRequestHandler extends
// Walk the term enum and keep a priority queue for each map in our set
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
- Fields theFields = reader.fields();
for (String fieldName : fieldNames) {
if (fields != null && ! fields.contains(fieldName) && ! fields.contains("*")) {
- continue; // we're not interested in this term
+ continue; //we're not interested in this field Still an issue here
}
- SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();
+ SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<Object>();
SchemaField sfield = schema.getFieldOrNull( fieldName );
FieldType ftype = (sfield==null)?null:sfield.getType();
- f.add( "type", (ftype==null)?null:ftype.getTypeName() );
- f.add( "schema", getFieldFlags( sfield ) );
+ fieldMap.add( "type", (ftype==null)?null:ftype.getTypeName() );
+ fieldMap.add("schema", getFieldFlags(sfield));
if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
- f.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
+ fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
}
-
- Terms terms = theFields.terms(fieldName);
+ Terms terms = reader.fields().terms(fieldName);
if (terms == null) { // Not indexed, so we need to report what we can (it made it through the fl param if specified)
- finfo.add( fieldName, f );
+ finfo.add( fieldName, fieldMap );
continue;
}
- TopTermQueue topTerms = ttinfo.get( fieldName );
- // If numTerms==0, the call is just asking for a quick field list
- if( ttinfo != null && sfield != null && sfield.indexed() ) {
- if (numTerms > 0) { // Read the actual field from the index and report that too.
- Document doc = null;
- if (topTerms != null && topTerms.getTopTermInfo() != null) {
- Term term = topTerms.getTopTermInfo().term;
- DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(),
- term.field(),
- new BytesRef(term.text()),
- false);
- if (docsEnum != null) {
- int docId;
- if ((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
- doc = reader.document(docId);
- }
- }
- }
- if( doc != null ) {
- // Found a document with this field
- try {
- IndexableField fld = doc.getField( fieldName );
- if( fld != null ) {
- f.add( "index", getFieldFlags( fld ) );
- }
- else {
- // it is a non-stored field...
- f.add( "index", "(unstored field)" );
- }
+ if(sfield != null && sfield.indexed() ) {
+ // In the pre-4.0 days, this did a veeeery expensive range query. But we can be much faster now,
+ // so just do this all the time.
+ Document doc = getFirstLiveDoc(reader, fieldName, terms);
+
+
+ if( doc != null ) {
+ // Found a document with this field
+ try {
+ IndexableField fld = doc.getField( fieldName );
+ if( fld != null ) {
+ fieldMap.add("index", getFieldFlags(fld));
}
- catch( Exception ex ) {
- log.warn( "error reading field: "+fieldName );
+ else {
+ // it is a non-stored field...
+ fieldMap.add("index", "(unstored field)");
}
}
- f.add("docs", terms.getDocCount());
+ catch( Exception ex ) {
+ log.warn( "error reading field: "+fieldName );
+ }
}
- if( topTerms != null ) {
- f.add( "distinct", topTerms.distinctTerms );
-
- // Include top terms
- f.add( "topTerms", topTerms.toNamedList( searcher.getSchema() ) );
+ fieldMap.add("docs", terms.getDocCount());
- // Add a histogram
- f.add( "histogram", topTerms.histogram.toNamedList() );
- }
+ }
+ if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) {
+ getDetailedFieldInfo(req, fieldName, fieldMap);
}
// Add the field
- finfo.add( fieldName, f );
+ finfo.add( fieldName, fieldMap );
}
return finfo;
}
+ // Just get a document with the term in it, the first one will do!
+ // Is there a better way to do this? Shouldn't actually be very costly
+ // to do it this way.
+ private static Document getFirstLiveDoc(AtomicReader reader, String fieldName, Terms terms) throws IOException {
+ DocsEnum docsEnum = null;
+ TermsEnum termsEnum = terms.iterator(null);
+ BytesRef text;
+ // Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
+ for (int idx = 0; idx < 1000 && docsEnum == null; ++idx) {
+ text = termsEnum.next();
+ if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
+ return null;
+ }
+ Term term = new Term(fieldName, text);
+ docsEnum = reader.termDocsEnum(reader.getLiveDocs(),
+ term.field(),
+ new BytesRef(term.text()),
+ false);
+ if (docsEnum != null) {
+ int docId;
+ if ((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+ return reader.document(docId);
+ }
+ }
+ }
+ return null;
+ }
+
/**
* Return info from the index
*/
@@ -525,67 +531,24 @@ public class LukeRequestHandler extends
v.add( f.getName() );
typeusemap.put( ft.getTypeName(), v );
}
- public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader, boolean countTerms) throws IOException {
- return getIndexInfo(reader, countTerms ? 1 : 0, null, null);
+
+ /**
+ * @deprecated use {@link #getIndexInfo(DirectoryReader)} since you now have to explicitly pass the "fl" prameter
+ * and this was always called with "false" anyway from CoreAdminHandler
+ */
+ public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader, boolean detail) throws IOException {
+ return getIndexInfo(reader);
}
- public static SimpleOrderedMap<Object> getIndexInfo( DirectoryReader reader, int numTerms,
- Map<String, TopTermQueue> topTerms,
- Set<String> fieldList) throws IOException {
+ // This method just gets the top-most level of information. This was conflated with getting detailed info
+ // for *all* the fields, called from CoreAdminHandler etc.
+
+ public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader) throws IOException {
Directory dir = reader.directory();
SimpleOrderedMap<Object> indexInfo = new SimpleOrderedMap<Object>();
indexInfo.add("numDocs", reader.numDocs());
indexInfo.add("maxDoc", reader.maxDoc());
- final CharsRef spare = new CharsRef();
- if( numTerms > 0 ) {
- Fields fields = MultiFields.getFields(reader);
- long totalTerms = 0;
- if (fields != null) {
- FieldsEnum fieldsEnum = fields.iterator();
- String field;
- while ((field = fieldsEnum.next()) != null) {
- Terms terms = fieldsEnum.terms();
- if (terms == null) {
- continue;
- }
- totalTerms += terms.getUniqueTermCount();
-
- if (fieldList != null && ! fieldList.contains(field) && ! fieldList.contains("*")) {
- continue;
- }
-
- TermsEnum termsEnum = terms.iterator(null);
- BytesRef text;
- int[] buckets = new int[HIST_ARRAY_SIZE];
- TopTermQueue tiq = topTerms.get(field);
- if (tiq == null) {
- tiq = new TopTermQueue(numTerms + 1); // Allocating slots for the top N terms to collect freqs.
- topTerms.put(field, tiq);
- }
- while ((text = termsEnum.next()) != null) {
- int freq = termsEnum.docFreq(); // This calculation seems odd, but it gives the same results as it used to.
- int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
- buckets[slot] = buckets[slot] + 1;
- if (freq > tiq.minFreq) {
- UnicodeUtil.UTF8toUTF16(text, spare);
- String t = spare.toString();
- tiq.distinctTerms = new Long(fieldsEnum.terms().getUniqueTermCount()).intValue();
-
- tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
- if (tiq.size() > numTerms) { // if tiq full
- tiq.pop(); // remove lowest in tiq
- tiq.minFreq = tiq.getTopTermInfo().docFreq;
- }
- }
- }
- tiq.histogram.add(buckets);
- }
- }
- //Clumsy, but I'm tired.
- indexInfo.add("numTerms", (new Long(totalTerms)).intValue());
- }
-
indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
indexInfo.add("segmentCount", reader.getSequentialSubReaders().length);
indexInfo.add("current", reader.isCurrent() );
@@ -598,6 +561,57 @@ public class LukeRequestHandler extends
}
return indexInfo;
}
+
+ // Get terribly detailed information about a particular field. This is a very expensive call, use it with caution
+ // especially on large indexes!
+ private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap)
+ throws IOException {
+
+ SolrParams params = req.getParams();
+ int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );
+
+ TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to collect the top N terms in.
+
+ final CharsRef spare = new CharsRef();
+
+ Fields fields = MultiFields.getFields(req.getSearcher().getIndexReader());
+
+ if (fields == null) { // No indexed fields
+ return;
+ }
+
+ Terms terms = fields.terms(field);
+ if (terms == null) { // No terms in the field.
+ return;
+ }
+ TermsEnum termsEnum = terms.iterator(null);
+ BytesRef text;
+ int[] buckets = new int[HIST_ARRAY_SIZE];
+ while ((text = termsEnum.next()) != null) {
+ int freq = termsEnum.docFreq(); // This calculation seems odd, but it gives the same results as it used to.
+ int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
+ buckets[slot] = buckets[slot] + 1;
+ if (freq > tiq.minFreq) {
+ UnicodeUtil.UTF8toUTF16(text, spare);
+ String t = spare.toString();
+ tiq.distinctTerms = new Long(terms.getUniqueTermCount()).intValue();
+
+ tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
+ if (tiq.size() > numTerms) { // if tiq full
+ tiq.pop(); // remove lowest in tiq
+ tiq.minFreq = tiq.getTopTermInfo().docFreq;
+ }
+ }
+ }
+ tiq.histogram.add(buckets);
+ fieldMap.add("distinct", tiq.distinctTerms);
+
+ // Include top terms
+ fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema()));
+
+ // Add a histogram
+ fieldMap.add("histogram", tiq.histogram.toNamedList());
+ }
//////////////////////// SolrInfoMBeans methods //////////////////////
@Override
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java Tue Feb 14 03:09:02 2012
@@ -82,7 +82,6 @@ public class MinimalSchemaTest extends S
assertQ("basic luke request failed",
req("qt", "/admin/luke")
,"//int[@name='numDocs'][.='2']"
- ,"//int[@name='numTerms'][.='5']"
);
assertQ("luke show schema failed",
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java Tue Feb 14 03:09:02 2012
@@ -108,35 +108,35 @@ public class LukeRequestHandlerTest exte
final int numFlags = EnumSet.allOf(FieldFlag.class).size();
assertQ("Not all flags ("+numFlags+") mentioned in info->key",
- req("qt","/admin/luke"),
- numFlags+"=count(//lst[@name='info']/lst[@name='key']/str)");
+ req("qt","/admin/luke"),
+ numFlags+"=count(//lst[@name='info']/lst[@name='key']/str)");
// code should be the same for all fields, but just in case do several
for (String f : Arrays.asList("solr_t","solr_s","solr_ti",
- "solr_td","solr_pl","solr_dt","solr_b",
- "solr_sS","solr_sI")) {
+ "solr_td","solr_pl","solr_dt","solr_b",
+ "solr_sS","solr_sI")) {
final String xp = getFieldXPathPrefix(f);
assertQ("Not as many schema flags as expected ("+numFlags+") for " + f,
- req("qt","/admin/luke", "fl", f),
- numFlags+"=string-length("+xp+"[@name='schema'])");
+ req("qt","/admin/luke", "fl", f),
+ numFlags+"=string-length("+xp+"[@name='schema'])");
}
// diff loop for checking 'index' flags,
// only valid for fields that are indexed & stored
for (String f : Arrays.asList("solr_t","solr_s","solr_ti",
- "solr_td","solr_pl","solr_dt","solr_b")) {
+ "solr_td","solr_pl","solr_dt","solr_b")) {
final String xp = getFieldXPathPrefix(f);
assertQ("Not as many index flags as expected ("+numFlags+") for " + f,
- req("qt","/admin/luke", "fl", f),
- numFlags+"=string-length("+xp+"[@name='index'])");
+ req("qt","/admin/luke", "fl", f),
+ numFlags+"=string-length("+xp+"[@name='index'])");
- final String hxp = getFieldXPathHistogram(f);
- assertQ("Historgram field should be present for field "+f,
- req("qt", "/admin/luke", "fl", f),
- hxp+"[@name='histogram']");
+ final String hxp = getFieldXPathHistogram(f);
+ assertQ("Historgram field should be present for field "+f,
+ req("qt", "/admin/luke", "fl", f),
+ hxp+"[@name='histogram']");
}
}
@@ -149,7 +149,7 @@ public class LukeRequestHandlerTest exte
@Test
public void testFlParam() {
- SolrQueryRequest req = req("qt", "/admin/luke", "fl", "solr_t solr_s");
+ SolrQueryRequest req = req("qt", "/admin/luke", "fl", "solr_t solr_s", "show", "all");
try {
// First, determine that the two fields ARE there
String response = h.query(req);
Modified: lucene/dev/trunk/solr/webapp/web/admin/schema.jsp
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/webapp/web/admin/schema.jsp?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/webapp/web/admin/schema.jsp (original)
+++ lucene/dev/trunk/solr/webapp/web/admin/schema.jsp Tue Feb 14 03:09:02 2012
@@ -114,7 +114,7 @@
//further populates the loaded schema with information gathered
// from the no argument LukeRequestHandler
loadFromLukeHandler: function(func) {
- $.getJSON(solr.pathToLukeHandler+'?wt=json', function(data) {
+ $.getJSON(solr.pathToLukeHandler+'?wt=json&fl=*', function(data) {
$.each(data.fields, function(i, item) {
var field = solr.schemaFields[i];