You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2012/02/14 04:09:02 UTC
svn commit: r1243774 - in /lucene/dev/trunk/solr: core/src/java/org/apache/solr/handler/admin/ core/src/test/org/apache/solr/ core/src/test/org/apache/solr/handler/admin/ webapp/web/admin/

Author: erick
Date: Tue Feb 14 03:09:02 2012
New Revision: 1243774

URL: http://svn.apache.org/viewvc?rev=1243774&view=rev
Log:
Fix for SOLR-3132

Modified:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java
    lucene/dev/trunk/solr/webapp/web/admin/schema.jsp

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java Tue Feb 14 03:09:02 2012
@@ -738,7 +738,7 @@ public class CoreAdminHandler extends Re
         info.add("uptime", System.currentTimeMillis() - core.getStartTime());
         RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
         try {
-          SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader(), false);
+          SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader());
           long size = getIndexSize(core);
           indexInfo.add("sizeInBytes", size);
           indexInfo.add("size", NumberUtils.readableSize(size));

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java Tue Feb 14 03:09:02 2012
@@ -82,25 +82,25 @@ public class LukeRequestHandler extends 
   public static final String DOC_ID = "docId";
   public static final String ID = "id";
   public static final int DEFAULT_COUNT = 10;
-  
+
   static final int HIST_ARRAY_SIZE = 33;
-  
+
   private static enum ShowStyle {
     ALL,
     DOC,
     SCHEMA,
     INDEX;
-    
+
     public static ShowStyle get(String v) {
       if(v==null) return null;
-      if("schema".equals(v)) return SCHEMA;
-      if("index".equals(v))  return INDEX;
-      if("doc".equals(v))    return DOC;
-      if("all".equals(v))    return ALL;
+      if("schema".equalsIgnoreCase(v)) return SCHEMA;
+      if("index".equalsIgnoreCase(v))  return INDEX;
+      if("doc".equalsIgnoreCase(v))    return DOC;
+      if("all".equalsIgnoreCase(v))    return ALL;
       throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown Show Style: "+v);
     }
   };
-  
+
 
   @Override
   public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
@@ -109,29 +109,17 @@ public class LukeRequestHandler extends 
     SolrIndexSearcher searcher = req.getSearcher();
     DirectoryReader reader = searcher.getIndexReader();
     SolrParams params = req.getParams();
-    int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );
     ShowStyle style = ShowStyle.get(params.get("show"));
 
-    // Always show the core lucene info
-    Map<String, TopTermQueue> topTerms = new TreeMap<String, TopTermQueue>();
-
     // If no doc is given, show all fields and top terms
-    Set<String> fields = null;
-    String fl = params.get(CommonParams.FL);
-    if (fl != null) {
-      fields = new TreeSet<String>(Arrays.asList(fl.split( "[,\\s]+" )));
-    }
-    if( ShowStyle.SCHEMA == style ) {
-      numTerms = 0; // Abort any statistics gathering.
-    }
 
-    rsp.add("index", getIndexInfo(reader, numTerms, topTerms, fields ));
-    
+    rsp.add("index", getIndexInfo(reader));
+
     if(ShowStyle.INDEX==style) {
-      return; // thats all we need
+      return; // that's all we need
     }
-        
-    
+
+
     Integer docId = params.getInt( DOC_ID );
     if( docId == null && params.get( ID ) != null ) {
       // Look for something with a given solr ID
@@ -170,7 +158,7 @@ public class LukeRequestHandler extends 
       rsp.add( "schema", getSchemaInfo( req.getSchema() ) );
     }
     else {
-      rsp.add( "fields", getIndexedFieldsInfo( searcher, fields, numTerms, topTerms) ) ;
+      rsp.add( "fields", getIndexedFieldsInfo(req) ) ;
     }
 
     // Add some generally helpful information
@@ -255,7 +243,8 @@ public class LukeRequestHandler extends 
     return key;
   }
 
-  private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader, IndexSchema schema ) throws IOException
+  private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader,
+                                                                 IndexSchema schema ) throws IOException
   {
     final CharsRef spare = new CharsRef();
     SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
@@ -311,13 +300,22 @@ public class LukeRequestHandler extends 
   }
 
   @SuppressWarnings("unchecked")
-  private static SimpleOrderedMap<Object> getIndexedFieldsInfo(
-      final SolrIndexSearcher searcher, final Set<String> fields, final int numTerms, Map<String,TopTermQueue> ttinfo)
+  private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req)
       throws Exception {
 
+    SolrIndexSearcher searcher = req.getSearcher();
+    SolrParams params = req.getParams();
+
+    Set<String> fields = null;
+    String fl = params.get(CommonParams.FL);
+    if (fl != null) {
+      fields = new TreeSet<String>(Arrays.asList(fl.split( "[,\\s]+" )));
+    }
+
     AtomicReader reader = searcher.getAtomicReader();
     IndexSchema schema = searcher.getSchema();
 
+    // Don't be tempted to put this in the loop below, the whole point here is to alphabetize the fields!
     Set<String> fieldNames = new TreeSet<String>();
     for(FieldInfo fieldInfo : reader.getFieldInfos()) {
       fieldNames.add(fieldInfo.name);
@@ -325,82 +323,90 @@ public class LukeRequestHandler extends 
 
     // Walk the term enum and keep a priority queue for each map in our set
     SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
-    Fields theFields = reader.fields();
 
     for (String fieldName : fieldNames) {
       if (fields != null && ! fields.contains(fieldName) && ! fields.contains("*")) {
-        continue; // we're not interested in this term
+        continue; //we're not interested in this field Still an issue here
       }
 
-      SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();
+      SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<Object>();
 
       SchemaField sfield = schema.getFieldOrNull( fieldName );
       FieldType ftype = (sfield==null)?null:sfield.getType();
 
-      f.add( "type", (ftype==null)?null:ftype.getTypeName() );
-      f.add( "schema", getFieldFlags( sfield ) );
+      fieldMap.add( "type", (ftype==null)?null:ftype.getTypeName() );
+      fieldMap.add("schema", getFieldFlags(sfield));
       if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
-        f.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
+        fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
       }
-
-      Terms terms = theFields.terms(fieldName);
+      Terms terms = reader.fields().terms(fieldName);
       if (terms == null) { // Not indexed, so we need to report what we can (it made it through the fl param if specified)
-        finfo.add( fieldName, f );
+        finfo.add( fieldName, fieldMap );
         continue;
       }
 
-      TopTermQueue topTerms = ttinfo.get( fieldName );
-      // If numTerms==0, the call is just asking for a quick field list
-      if( ttinfo != null && sfield != null && sfield.indexed() ) {
-        if (numTerms > 0) { // Read the actual field from the index and report that too.
-          Document doc = null;
-          if (topTerms != null && topTerms.getTopTermInfo() != null) {
-            Term term = topTerms.getTopTermInfo().term;
-            DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(),
-                term.field(),
-                new BytesRef(term.text()),
-                false);
-            if (docsEnum != null) {
-              int docId;
-              if ((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
-                doc = reader.document(docId);
-              }
-            }
-          }
-          if( doc != null ) {
-            // Found a document with this field
-            try {
-              IndexableField fld = doc.getField( fieldName );
-              if( fld != null ) {
-                f.add( "index", getFieldFlags( fld ) );
-              }
-              else {
-                // it is a non-stored field...
-                f.add( "index", "(unstored field)" );
-              }
+      if(sfield != null && sfield.indexed() ) {
+        // In the pre-4.0 days, this did a veeeery expensive range query. But we can be much faster now,
+        // so just do this all the time.
+        Document doc = getFirstLiveDoc(reader, fieldName, terms);
+
+
+        if( doc != null ) {
+          // Found a document with this field
+          try {
+            IndexableField fld = doc.getField( fieldName );
+            if( fld != null ) {
+              fieldMap.add("index", getFieldFlags(fld));
             }
-            catch( Exception ex ) {
-              log.warn( "error reading field: "+fieldName );
+            else {
+              // it is a non-stored field...
+              fieldMap.add("index", "(unstored field)");
             }
           }
-          f.add("docs", terms.getDocCount());
+          catch( Exception ex ) {
+            log.warn( "error reading field: "+fieldName );
+          }
         }
-        if( topTerms != null ) {
-          f.add( "distinct", topTerms.distinctTerms );
-
-          // Include top terms
-          f.add( "topTerms", topTerms.toNamedList( searcher.getSchema() ) );
+        fieldMap.add("docs", terms.getDocCount());
 
-          // Add a histogram
-          f.add( "histogram", topTerms.histogram.toNamedList() );
-        }
+      }
+      if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) {
+        getDetailedFieldInfo(req, fieldName, fieldMap);
       }
       // Add the field
-      finfo.add( fieldName, f );
+      finfo.add( fieldName, fieldMap );
     }
     return finfo;
   }
 
+  // Just get a document with the term in it, the first one will do!
+  // Is there a better way to do this? Shouldn't actually be very costly
+  // to do it this way.
+  private static Document getFirstLiveDoc(AtomicReader reader, String fieldName, Terms terms) throws IOException {
+    DocsEnum docsEnum = null;
+    TermsEnum termsEnum = terms.iterator(null);
+    BytesRef text;
+    // Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
+    for (int idx = 0; idx < 1000 && docsEnum == null; ++idx) {
+      text = termsEnum.next();
+      if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
+        return null;
+      }
+      Term term = new Term(fieldName, text);
+      docsEnum = reader.termDocsEnum(reader.getLiveDocs(),
+          term.field(),
+          new BytesRef(term.text()),
+          false);
+      if (docsEnum != null) {
+        int docId;
+        if ((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+          return reader.document(docId);
+        }
+      }
+    }
+    return null;
+  }
+
   /**
    * Return info from the index
    */
@@ -525,67 +531,24 @@ public class LukeRequestHandler extends 
     v.add( f.getName() );
     typeusemap.put( ft.getTypeName(), v );
   }
-  public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader, boolean countTerms) throws IOException {
-    return getIndexInfo(reader, countTerms ? 1 : 0, null, null);
+
+  /**
+   * @deprecated use {@link #getIndexInfo(DirectoryReader)} since you now have to explicitly pass the "fl" prameter
+   * and this was always called with "false" anyway from CoreAdminHandler
+   */
+  public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader, boolean detail) throws IOException {
+    return getIndexInfo(reader);
   }
-  public static SimpleOrderedMap<Object> getIndexInfo( DirectoryReader reader, int numTerms,
-                                                       Map<String, TopTermQueue> topTerms,
-                                                       Set<String> fieldList) throws IOException {
+  // This method just gets the top-most level of information. This was conflated with getting detailed info
+  // for *all* the fields, called from CoreAdminHandler etc.
+
+  public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader) throws IOException {
     Directory dir = reader.directory();
     SimpleOrderedMap<Object> indexInfo = new SimpleOrderedMap<Object>();
 
     indexInfo.add("numDocs", reader.numDocs());
     indexInfo.add("maxDoc", reader.maxDoc());
-    final CharsRef spare = new CharsRef();
-    if( numTerms > 0 ) {
-      Fields fields = MultiFields.getFields(reader);
-      long totalTerms = 0;
-      if (fields != null) {
-        FieldsEnum fieldsEnum = fields.iterator();
-        String field;
-        while ((field = fieldsEnum.next()) != null) {
-          Terms terms = fieldsEnum.terms();
-          if (terms == null) {
-            continue;
-          }
-          totalTerms += terms.getUniqueTermCount();
-
-          if (fieldList != null && ! fieldList.contains(field) && ! fieldList.contains("*")) {
-            continue;
-          }
-
-          TermsEnum termsEnum = terms.iterator(null);
-          BytesRef text;
-          int[] buckets = new int[HIST_ARRAY_SIZE];
-          TopTermQueue tiq = topTerms.get(field);
-          if (tiq == null) {
-            tiq = new TopTermQueue(numTerms + 1);   // Allocating slots for the top N terms to collect freqs.
-            topTerms.put(field, tiq);
-          }
-          while ((text = termsEnum.next()) != null) {
-            int freq = termsEnum.docFreq();  // This calculation seems odd, but it gives the same results as it used to.
-            int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
-            buckets[slot] = buckets[slot] + 1;
-            if (freq > tiq.minFreq) {
-              UnicodeUtil.UTF8toUTF16(text, spare);
-              String t = spare.toString();
-              tiq.distinctTerms = new Long(fieldsEnum.terms().getUniqueTermCount()).intValue();
-
-              tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
-              if (tiq.size() > numTerms) { // if tiq full
-                tiq.pop(); // remove lowest in tiq
-                tiq.minFreq  = tiq.getTopTermInfo().docFreq;
-              }
-            }
-          }
-          tiq.histogram.add(buckets);
-        }
-      }
-      //Clumsy, but I'm tired.
-      indexInfo.add("numTerms", (new Long(totalTerms)).intValue());
 
-    }
-        
     indexInfo.add("version", reader.getVersion());  // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
     indexInfo.add("segmentCount", reader.getSequentialSubReaders().length);
     indexInfo.add("current", reader.isCurrent() );
@@ -598,6 +561,57 @@ public class LukeRequestHandler extends 
     }
     return indexInfo;
   }
+
+  // Get terribly detailed information about a particular field. This is a very expensive call, use it with caution
+  // especially on large indexes!
+  private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap)
+      throws IOException {
+
+    SolrParams params = req.getParams();
+    int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );
+
+    TopTermQueue tiq = new TopTermQueue(numTerms + 1);  // Something to collect the top N terms in.
+
+    final CharsRef spare = new CharsRef();
+
+    Fields fields = MultiFields.getFields(req.getSearcher().getIndexReader());
+
+    if (fields == null) { // No indexed fields
+      return;
+    }
+
+    Terms terms = fields.terms(field);
+    if (terms == null) {  // No terms in the field.
+      return;
+    }
+    TermsEnum termsEnum = terms.iterator(null);
+    BytesRef text;
+    int[] buckets = new int[HIST_ARRAY_SIZE];
+    while ((text = termsEnum.next()) != null) {
+      int freq = termsEnum.docFreq();  // This calculation seems odd, but it gives the same results as it used to.
+      int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
+      buckets[slot] = buckets[slot] + 1;
+      if (freq > tiq.minFreq) {
+        UnicodeUtil.UTF8toUTF16(text, spare);
+        String t = spare.toString();
+        tiq.distinctTerms = new Long(terms.getUniqueTermCount()).intValue();
+
+        tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
+        if (tiq.size() > numTerms) { // if tiq full
+          tiq.pop(); // remove lowest in tiq
+          tiq.minFreq = tiq.getTopTermInfo().docFreq;
+        }
+      }
+    }
+    tiq.histogram.add(buckets);
+    fieldMap.add("distinct", tiq.distinctTerms);
+
+    // Include top terms
+    fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema()));
+
+    // Add a histogram
+    fieldMap.add("histogram", tiq.histogram.toNamedList());
+  }
   //////////////////////// SolrInfoMBeans methods //////////////////////
 
   @Override

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java Tue Feb 14 03:09:02 2012
@@ -82,7 +82,6 @@ public class MinimalSchemaTest extends S
     assertQ("basic luke request failed",
             req("qt", "/admin/luke")
             ,"//int[@name='numDocs'][.='2']"
-            ,"//int[@name='numTerms'][.='5']"
             );
 
     assertQ("luke show schema failed",

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerTest.java Tue Feb 14 03:09:02 2012
@@ -108,35 +108,35 @@ public class LukeRequestHandlerTest exte
     final int numFlags = EnumSet.allOf(FieldFlag.class).size();
 
     assertQ("Not all flags ("+numFlags+") mentioned in info->key",
-            req("qt","/admin/luke"),
-            numFlags+"=count(//lst[@name='info']/lst[@name='key']/str)");
+        req("qt","/admin/luke"),
+        numFlags+"=count(//lst[@name='info']/lst[@name='key']/str)");
 
     // code should be the same for all fields, but just in case do several
     for (String f : Arrays.asList("solr_t","solr_s","solr_ti",
-                                  "solr_td","solr_pl","solr_dt","solr_b",
-                                  "solr_sS","solr_sI")) {
+        "solr_td","solr_pl","solr_dt","solr_b",
+        "solr_sS","solr_sI")) {
 
       final String xp = getFieldXPathPrefix(f);
       assertQ("Not as many schema flags as expected ("+numFlags+") for " + f,
-              req("qt","/admin/luke", "fl", f),
-              numFlags+"=string-length("+xp+"[@name='schema'])");
+          req("qt","/admin/luke", "fl", f),
+          numFlags+"=string-length("+xp+"[@name='schema'])");
 
     }
 
     // diff loop for checking 'index' flags,
     // only valid for fields that are indexed & stored
     for (String f : Arrays.asList("solr_t","solr_s","solr_ti",
-                                  "solr_td","solr_pl","solr_dt","solr_b")) {
+        "solr_td","solr_pl","solr_dt","solr_b")) {
 
       final String xp = getFieldXPathPrefix(f);
       assertQ("Not as many index flags as expected ("+numFlags+") for " + f,
-              req("qt","/admin/luke", "fl", f),
-              numFlags+"=string-length("+xp+"[@name='index'])");
+          req("qt","/admin/luke", "fl", f),
+          numFlags+"=string-length("+xp+"[@name='index'])");
 
-    final String hxp = getFieldXPathHistogram(f);
-    assertQ("Historgram field should be present for field "+f,
-        req("qt", "/admin/luke", "fl", f),
-        hxp+"[@name='histogram']");
+      final String hxp = getFieldXPathHistogram(f);
+      assertQ("Historgram field should be present for field "+f,
+          req("qt", "/admin/luke", "fl", f),
+          hxp+"[@name='histogram']");
     }
   }
 
@@ -149,7 +149,7 @@ public class LukeRequestHandlerTest exte
 
   @Test
   public void testFlParam() {
-    SolrQueryRequest req = req("qt", "/admin/luke", "fl", "solr_t solr_s");
+    SolrQueryRequest req = req("qt", "/admin/luke", "fl", "solr_t solr_s", "show", "all");
     try {
       // First, determine that the two fields ARE there
       String response = h.query(req);

Modified: lucene/dev/trunk/solr/webapp/web/admin/schema.jsp
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/webapp/web/admin/schema.jsp?rev=1243774&r1=1243773&r2=1243774&view=diff
==============================================================================
--- lucene/dev/trunk/solr/webapp/web/admin/schema.jsp (original)
+++ lucene/dev/trunk/solr/webapp/web/admin/schema.jsp Tue Feb 14 03:09:02 2012
@@ -114,7 +114,7 @@
     //further populates the loaded schema with information gathered
     // from the no argument LukeRequestHandler
     loadFromLukeHandler: function(func) {
-      $.getJSON(solr.pathToLukeHandler+'?wt=json', function(data) {
+      $.getJSON(solr.pathToLukeHandler+'?wt=json&fl=*', function(data) {
         $.each(data.fields, function(i, item) {
           var field = solr.schemaFields[i];