You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cm...@apache.org on 2013/08/11 14:19:39 UTC

svn commit: r1512909 [23/38] - in /lucene/dev/branches/lucene4956: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/suggest/ dev-tools/idea/solr/contrib/dataimporthandler/ dev-tools/idea/solr/core/src/test/ dev-too...

Modified: lucene/dev/branches/lucene4956/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java Sun Aug 11 12:19:13 2013
@@ -303,6 +303,13 @@ public class SolrContentHandler extends 
     bldrStack.getLast().append(chars, offset, length);
   }
 
+  /**
+   * Treat the same as any other characters
+   */
+  @Override
+  public void ignorableWhitespace(char[] chars, int offset, int length) throws SAXException {
+    characters(chars, offset, length);
+  }
 
   /**
    * Can be used to transform input values based on their {@link org.apache.solr.schema.SchemaField}

Modified: lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test-files/extraction/simple.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test-files/extraction/simple.html?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test-files/extraction/simple.html (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test-files/extraction/simple.html Sun Aug 11 12:19:13 2013
@@ -6,6 +6,7 @@
 <p>
   Here is some text
 </p>
+<p>distinct<br/>words</p>
 <div>Here is some text in a div</div>
 <div>This has a <a href="http://www.apache.org">link</a>.</div>
 </body>

Modified: lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml Sun Aug 11 12:19:13 2013
@@ -20,6 +20,9 @@
 <config>
   <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
   <jmx />
+  <indexConfig>
+    <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+  </indexConfig>
 
   <!-- Used to specify an alternate directory to hold all index data.
        It defaults to "index" if not present, and should probably
@@ -27,8 +30,6 @@
   <dataDir>${solr.data.dir:}</dataDir>
   <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
 
-  <!-- <indexConfig> section could go here, but we want the defaults -->
-
   <updateHandler class="solr.DirectUpdateHandler2">
 
     <!-- autocommit pending docs if certain criteria are met 

Modified: lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java Sun Aug 11 12:19:13 2013
@@ -88,6 +88,10 @@ public class ExtractingRequestHandlerTes
     assertU(commit());
     assertQ(req("title:Welcome"), "//*[@numFound='1']");
 
+    assertQ(req("extractedContent:distinctwords"),      "//*[@numFound='0']");
+    assertQ(req("extractedContent:distinct"),           "//*[@numFound='1']");
+    assertQ(req("extractedContent:words"),              "//*[@numFound='2']");
+    assertQ(req("extractedContent:\"distinct words\""), "//*[@numFound='1']");
 
     loadLocal("extraction/simple.html",
       "literal.id","simple2",

Modified: lucene/dev/branches/lucene4956/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangIdParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangIdParams.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangIdParams.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangIdParams.java Sun Aug 11 12:19:13 2013
@@ -31,6 +31,7 @@ public interface LangIdParams {
   String THRESHOLD  = LANGUAGE_ID + ".threshold";            // Detection threshold
   String ENFORCE_SCHEMA =  LANGUAGE_ID + ".enforceSchema";   // Enforces that output fields exist in schema
   String LANG_WHITELIST  = LANGUAGE_ID + ".whitelist";       // Allowed languages
+  String LCMAP =  LANGUAGE_ID + ".lcmap";                    // Maps detected langcode to other value
   String MAP_ENABLE =  LANGUAGE_ID + ".map";                 // Turns on or off the field mapping
   String MAP_FL =  LANGUAGE_ID + ".map.fl";                  // Field list for mapping
   String MAP_OVERWRITE =  LANGUAGE_ID + ".map.overwrite";    // Whether to overwrite existing fields

Modified: lucene/dev/branches/lucene4956/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java Sun Aug 11 12:19:13 2013
@@ -75,6 +75,7 @@ public abstract class LanguageIdentifier
   protected HashSet<String> mapIndividualFieldsSet;
   protected HashSet<String> allMapFieldsSet;
   protected HashMap<String,String> lcMap;
+  protected HashMap<String,String> mapLcMap;
   protected IndexSchema schema;
 
   // Regex patterns
@@ -138,13 +139,26 @@ public abstract class LanguageIdentifier
         allMapFieldsSet.addAll(mapIndividualFieldsSet);
       }
 
-      // Language Code mapping
+      // Normalize detected langcode onto normalized langcode
       lcMap = new HashMap<String,String>();
+      if(params.get(LCMAP) != null) {
+        for(String mapping : params.get(LCMAP).split("[, ]")) {
+          String[] keyVal = mapping.split(":");
+          if(keyVal.length == 2) {
+            lcMap.put(keyVal[0], keyVal[1]);
+          } else {
+            log.error("Unsupported format for langid.lcmap: "+mapping+". Skipping this mapping.");
+          }
+        }
+      }
+
+      // Language Code mapping
+      mapLcMap = new HashMap<String,String>();
       if(params.get(MAP_LCMAP) != null) {
         for(String mapping : params.get(MAP_LCMAP).split("[, ]")) {
           String[] keyVal = mapping.split(":");
           if(keyVal.length == 2) {
-            lcMap.put(keyVal[0], keyVal[1]);
+            mapLcMap.put(keyVal[0], keyVal[1]);
           } else {
             log.error("Unsupported format for langid.map.lcmap: "+mapping+". Skipping this mapping.");
           }
@@ -322,10 +336,11 @@ public abstract class LanguageIdentifier
       langStr = fallbackLang;
     } else {
       DetectedLanguage lang = languages.get(0);
-      if(langWhitelist.isEmpty() || langWhitelist.contains(lang.getLangCode())) {
-        log.debug("Language detected {} with certainty {}", lang.getLangCode(), lang.getCertainty());
+      String normalizedLang = normalizeLangCode(lang.getLangCode());
+      if(langWhitelist.isEmpty() || langWhitelist.contains(normalizedLang)) {
+        log.debug("Language detected {} with certainty {}", normalizedLang, lang.getCertainty());
         if(lang.getCertainty() >= threshold) {
-          langStr = lang.getLangCode();
+          langStr = normalizedLang;
         } else {
           log.debug("Detected language below threshold {}, using fallback {}", threshold, fallbackLang);
           langStr = fallbackLang;
@@ -345,6 +360,20 @@ public abstract class LanguageIdentifier
   }
 
   /**
+   * Looks up language code in map (langid.lcmap) and returns mapped value
+   * @param langCode the language code string returned from detector
+   * @return the normalized/mapped language code
+   */
+  protected String normalizeLangCode(String langCode) {
+    if (lcMap.containsKey(langCode)) {
+      String lc = lcMap.get(langCode);
+      log.debug("Doing langcode normalization mapping from "+langCode+" to "+lc);
+      return lc;
+    }
+    return langCode;
+  }
+
+  /**
    * Returns the name of the field to map the current contents into, so that they are properly analyzed.  For instance
    * if the currentField is "text" and the code is "en", the new field would by default be "text_en".
    * This method also performs custom regex pattern replace if configured. If enforceSchema=true
@@ -355,7 +384,7 @@ public abstract class LanguageIdentifier
    * @return The new schema field name, based on pattern and replace, or null if illegal
    */
   protected String getMappedField(String currentField, String language) {
-    String lc = lcMap.containsKey(language) ? lcMap.get(language) : language;
+    String lc = mapLcMap.containsKey(language) ? mapLcMap.get(language) : language;
     String newFieldName = langPattern.matcher(mapPattern.matcher(currentField).replaceFirst(mapReplaceStr)).replaceFirst(lc);
     if(enforceSchema && schema.getFieldOrNull(newFieldName) == null) {
       log.warn("Unsuccessful field name mapping from {} to {}, field does not exist and enforceSchema=true; skipping mapping.", currentField, newFieldName);

Modified: lucene/dev/branches/lucene4956/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/solrconfig-languageidentifier.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/solrconfig-languageidentifier.xml?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/solrconfig-languageidentifier.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/solrconfig-languageidentifier.xml Sun Aug 11 12:19:13 2013
@@ -20,6 +20,9 @@
 <config>
 
   <jmx />
+  <indexConfig>
+    <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+  </indexConfig>
 
   <!-- Used to specify an alternate directory to hold all index data.
        It defaults to "index" if not present, and should probably

Modified: lucene/dev/branches/lucene4956/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java Sun Aug 11 12:19:13 2013
@@ -116,6 +116,22 @@ public abstract class LanguageIdentifier
   }
 
   @Test
+  public void testMapLangcode() throws Exception {
+    parameters = new ModifiableSolrParams();
+    parameters.add("langid.fl", "name");
+    parameters.add("langid.lcmap", "zh_cn:zh zh_tw:zh");
+    parameters.set("langid.enforceSchema", "false");
+    liProcessor = createLangIdProcessor(parameters);
+
+    assertEquals("zh", liProcessor.resolveLanguage("zh_cn", "NA"));
+    assertEquals("zh", liProcessor.resolveLanguage("zh_tw", "NA"));
+    assertEquals("no", liProcessor.resolveLanguage("no", "NA"));
+    List<DetectedLanguage> langs = new ArrayList<DetectedLanguage>();
+    langs.add(new DetectedLanguage("zh_cn", 0.8));
+    assertEquals("zh", liProcessor.resolveLanguage(langs, "NA"));
+  }
+
+  @Test
   public void testPreExisting() throws Exception {
     SolrInputDocument doc;
     parameters = new ModifiableSolrParams();

Modified: lucene/dev/branches/lucene4956/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java Sun Aug 11 12:19:13 2013
@@ -17,8 +17,6 @@ package org.apache.solr.uima.processor;
  * limitations under the License.
  */
 
-import java.util.Map;
-
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
 import org.apache.uima.cas.FSIterator;
@@ -29,6 +27,8 @@ import org.apache.uima.jcas.tcas.Annotat
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.Map;
+
 /**
  * Map UIMA types and features over fields of a Solr document
  * 
@@ -64,16 +64,18 @@ public class UIMAToSolrMapper {
           String fieldNameFeatureValue = fieldNameFeature == null ? null :
               fs.getFeatureValueAsString(type.getFeatureByBaseName(fieldNameFeature));
           String fieldName = mapField.getFieldName(fieldNameFeatureValue);
-          log.info(new StringBuilder("mapping ").append(typeName).append("@").append(featureName)
-              .append(" to ").append(fieldName).toString());
+          if (log.isInfoEnabled()) {
+            log.info("mapping {}@{} to {}", new Object[]{typeName, featureName, fieldName});
+          }
           String featureValue;
           if (fs instanceof Annotation && "coveredText".equals(featureName)) {
             featureValue = ((Annotation) fs).getCoveredText();
           } else {
             featureValue = fs.getFeatureValueAsString(type.getFeatureByBaseName(featureName));
           }
-          log.info(new StringBuilder("writing ").append(featureValue).append(" in ").append(
-              fieldName).toString());
+          if (log.isDebugEnabled()) {
+            log.debug("writing {} in {}", new Object[]{featureValue, fieldName});
+          }
           document.addField(fieldName, featureValue, 1.0f);
         }
       }

Modified: lucene/dev/branches/lucene4956/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java Sun Aug 11 12:19:13 2013
@@ -111,9 +111,9 @@ public class UIMAUpdateRequestProcessor 
         debugString = " null text";
       }
       if (solrUIMAConfiguration.isIgnoreErrors()) {
-        log.warn(new StringBuilder("skip the text processing due to ")
+        log.warn("skip the text processing due to {}",new StringBuilder()
           .append(e.getLocalizedMessage()).append(optionalFieldInfo)
-          .append(debugString).toString());
+          .append(debugString));
       } else {
         throw new SolrException(ErrorCode.SERVER_ERROR,
             new StringBuilder("processing error ")
@@ -150,7 +150,9 @@ public class UIMAUpdateRequestProcessor 
   /* process a field value executing UIMA the CAS containing it as document text */
   private JCas processText(String textFieldValue) throws ResourceInitializationException,
           AnalysisEngineProcessException {
-    log.info(new StringBuilder("Analyzing text").toString());
+    if (log.isDebugEnabled()) {
+      log.debug("Analyzing text");
+    }
     /* get the UIMA analysis engine */
     AnalysisEngine ae = aeProvider.getAE();
 
@@ -160,7 +162,9 @@ public class UIMAUpdateRequestProcessor 
 
     /* perform analysis on text field */
     ae.process(jcas);
-    log.info("Text processing completed");
+    if (log.isDebugEnabled()) {
+      log.debug("Text processing completed");
+    }
     return jcas;
   }
 

Modified: lucene/dev/branches/lucene4956/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml Sun Aug 11 12:19:13 2013
@@ -25,6 +25,9 @@
   -->
 <config xmlns:xi="http://www.w3.org/2001/XInclude">
   <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
+  <indexConfig>
+    <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+  </indexConfig>
   <!--
     lib directives can be used to instruct Solr to load an Jars
     identified and use them to resolve any "plugins" specified in your
@@ -45,13 +48,10 @@
     ends) will be included.
   -->
   <lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
-  <lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
   <!--
     If a dir option (with or without a regex) is used and nothing is
     found that matches, it will be ignored
   -->
-  <lib dir="../../contrib/clustering/lib/downloads/" />
-  <lib dir="../../contrib/clustering/lib/" />
   <lib dir="/total/crap/dir/ignored" />
   <!--
     an exact path can be used to specify a specific file. This will
@@ -68,7 +68,6 @@
   -->
   <dataDir>${solr.data.dir:}</dataDir>
 
-  <!-- <indexConfig> section could go here, but we want the defaults -->
 
   <!--
     Enables JMX if and only if an existing MBeanServer is found, use
@@ -594,68 +593,6 @@
     </arr>
   </requestHandler>
 
-  <!--
-    Clustering Component http://wiki.apache.org/solr/ClusteringComponent
-    This relies on third party jars which are not included in the
-    release. To use this component (and the "/clustering" handler) Those
-    jars will need to be downloaded, and you'll need to set the
-    solr.cluster.enabled system property when running solr... java
-    -Dsolr.clustering.enabled=true -jar start.jar
-  -->
-  <searchComponent name="clusteringComponent"
-    enable="${solr.clustering.enabled:false}" class="org.apache.solr.handler.clustering.ClusteringComponent">
-    <!-- Declare an engine -->
-    <lst name="engine">
-      <!-- The name, only one can be named "default" -->
-      <str name="name">default</str>
-      <!--
-        Class name of Carrot2 clustering algorithm. Currently available
-        algorithms are: *
-        org.carrot2.clustering.lingo.LingoClusteringAlgorithm *
-        org.carrot2.clustering.stc.STCClusteringAlgorithm See
-        http://project.carrot2.org/algorithms.html for the algorithm's
-        characteristics.
-      -->
-      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
-      <!--
-        Overriding values for Carrot2 default algorithm attributes. For
-        a description of all available attributes, see:
-        http://download.carrot2.org/stable/manual/#chapter.components.
-        Use attribute key as name attribute of str elements below. These
-        can be further overridden for individual requests by specifying
-        attribute key as request parameter name and attribute value as
-        parameter value.
-      -->
-      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
-    </lst>
-    <lst name="engine">
-      <str name="name">stc</str>
-      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
-    </lst>
-  </searchComponent>
-  <requestHandler name="/clustering" enable="${solr.clustering.enabled:false}"
-    class="solr.SearchHandler">
-    <lst name="defaults">
-      <bool name="clustering">true</bool>
-      <str name="clustering.engine">default</str>
-      <bool name="clustering.results">true</bool>
-      <!-- The title field -->
-      <str name="carrot.title">name</str>
-      <str name="carrot.url">id</str>
-      <!-- The field to cluster on -->
-      <str name="carrot.snippet">features</str>
-      <!-- produce summaries -->
-      <bool name="carrot.produceSummary">true</bool>
-      <!-- the maximum number of labels per cluster -->
-      <!--<int name="carrot.numDescriptions">5</int>-->
-      <!-- produce sub clusters -->
-      <bool name="carrot.outputSubClusters">false</bool>
-    </lst>
-    <arr name="last-components">
-      <str>clusteringComponent</str>
-    </arr>
-  </requestHandler>
-
   <!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
   <requestHandler name="/update/extract"
     class="org.apache.solr.handler.extraction.ExtractingRequestHandler"

Modified: lucene/dev/branches/lucene4956/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml Sun Aug 11 12:19:13 2013
@@ -25,6 +25,9 @@
   -->
 <config xmlns:xi="http://www.w3.org/2001/XInclude">
   <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
+  <indexConfig>
+    <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+  </indexConfig>
   <!--
     lib directives can be used to instruct Solr to load an Jars
     identified and use them to resolve any "plugins" specified in your
@@ -45,13 +48,10 @@
     ends) will be included.
   -->
   <lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
-  <lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
   <!--
     If a dir option (with or without a regex) is used and nothing is
     found that matches, it will be ignored
   -->
-  <lib dir="../../contrib/clustering/lib/downloads/" />
-  <lib dir="../../contrib/clustering/lib/" />
   <lib dir="/total/crap/dir/ignored" />
   <!--
     an exact path can be used to specify a specific file. This will
@@ -68,8 +68,6 @@
   -->
   <dataDir>${solr.data.dir:}</dataDir>
 
-  <!-- <indexConfig> section could go here, but we want the defaults -->
-
   <!--
     Enables JMX if and only if an existing MBeanServer is found, use
     this if you want to configure JMX through JVM parameters. Remove
@@ -594,68 +592,6 @@
     </arr>
   </requestHandler>
 
-  <!--
-    Clustering Component http://wiki.apache.org/solr/ClusteringComponent
-    This relies on third party jars which are not included in the
-    release. To use this component (and the "/clustering" handler) Those
-    jars will need to be downloaded, and you'll need to set the
-    solr.cluster.enabled system property when running solr... java
-    -Dsolr.clustering.enabled=true -jar start.jar
-  -->
-  <searchComponent name="clusteringComponent"
-    enable="${solr.clustering.enabled:false}" class="org.apache.solr.handler.clustering.ClusteringComponent">
-    <!-- Declare an engine -->
-    <lst name="engine">
-      <!-- The name, only one can be named "default" -->
-      <str name="name">default</str>
-      <!--
-        Class name of Carrot2 clustering algorithm. Currently available
-        algorithms are: *
-        org.carrot2.clustering.lingo.LingoClusteringAlgorithm *
-        org.carrot2.clustering.stc.STCClusteringAlgorithm See
-        http://project.carrot2.org/algorithms.html for the algorithm's
-        characteristics.
-      -->
-      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
-      <!--
-        Overriding values for Carrot2 default algorithm attributes. For
-        a description of all available attributes, see:
-        http://download.carrot2.org/stable/manual/#chapter.components.
-        Use attribute key as name attribute of str elements below. These
-        can be further overridden for individual requests by specifying
-        attribute key as request parameter name and attribute value as
-        parameter value.
-      -->
-      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
-    </lst>
-    <lst name="engine">
-      <str name="name">stc</str>
-      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
-    </lst>
-  </searchComponent>
-  <requestHandler name="/clustering" enable="${solr.clustering.enabled:false}"
-    class="solr.SearchHandler">
-    <lst name="defaults">
-      <bool name="clustering">true</bool>
-      <str name="clustering.engine">default</str>
-      <bool name="clustering.results">true</bool>
-      <!-- The title field -->
-      <str name="carrot.title">name</str>
-      <str name="carrot.url">id</str>
-      <!-- The field to cluster on -->
-      <str name="carrot.snippet">features</str>
-      <!-- produce summaries -->
-      <bool name="carrot.produceSummary">true</bool>
-      <!-- the maximum number of labels per cluster -->
-      <!--<int name="carrot.numDescriptions">5</int>-->
-      <!-- produce sub clusters -->
-      <bool name="carrot.outputSubClusters">false</bool>
-    </lst>
-    <arr name="last-components">
-      <str>clusteringComponent</str>
-    </arr>
-  </requestHandler>
-
   <!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
   <requestHandler name="/update/extract"
     class="org.apache.solr.handler.extraction.ExtractingRequestHandler"

Modified: lucene/dev/branches/lucene4956/solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java Sun Aug 11 12:19:13 2013
@@ -17,6 +17,7 @@
 
 package org.apache.solr.response;
 
+import org.apache.lucene.util.IOUtils;
 import org.apache.solr.client.solrj.SolrResponse;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.response.SolrResponseBase;
@@ -150,7 +151,7 @@ public class VelocityResponseWriter impl
         try {
           is = resourceLoader.getResourceStream(propFile);
           Properties props = new Properties();
-          props.load(is);
+          props.load(new InputStreamReader(is, IOUtils.CHARSET_UTF_8));
           engine.init(props);
         }
         finally {

Modified: lucene/dev/branches/lucene4956/solr/contrib/velocity/src/test-files/velocity/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/velocity/src/test-files/velocity/solr/collection1/conf/solrconfig.xml?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/velocity/src/test-files/velocity/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/velocity/src/test-files/velocity/solr/collection1/conf/solrconfig.xml Sun Aug 11 12:19:13 2013
@@ -22,6 +22,9 @@
 -->
 <config>
   <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
+  <indexConfig>
+    <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+  </indexConfig>
 
   <lib dir="../../contrib/velocity/lib" />
   <lib dir="../../dist/" regex="solr-velocity-\d.*\.jar" />
@@ -31,9 +34,6 @@
   <directoryFactory name="DirectoryFactory"
                     class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
 
-
-  <!-- <indexConfig> section could go here, but we want the defaults -->
-
   <updateHandler class="solr.DirectUpdateHandler2">
   </updateHandler>
   

Modified: lucene/dev/branches/lucene4956/solr/core/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/core/build.xml?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/core/build.xml (original)
+++ lucene/dev/branches/lucene4956/solr/core/build.xml Sun Aug 11 12:19:13 2013
@@ -15,16 +15,23 @@
     See the License for the specific language governing permissions and
     limitations under the License.
  -->
-<project name="solr-core" default="default">
+<project name="solr-core" default="default" xmlns:ivy="antlib:org.apache.ivy.ant">
   <description>Solr Core</description>
 
   <!-- html file for testing -->
   <property name="rat.excludes" value="**/htmlStripReaderTest.html,**/*.iml"/>
+  
+  <property name="test.lib.dir" location="test-lib"/>
 
   <import file="../common-build.xml"/>
 
   <target name="compile-core" depends="compile-solrj,common-solr.compile-core"/>
 
+  <path id="test.classpath">
+    <path refid="solr.test.base.classpath"/>
+    <fileset dir="${test.lib.dir}" includes="*.jar"/>
+  </path>
+
   <!-- specialized to ONLY depend on solrj -->
   <target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solrj">
     <sequential>
@@ -43,8 +50,13 @@
 
   <target name="dist-maven" depends="dist-maven-src-java"/>
 
-
-
+  <target name="resolve" depends="ivy-availability-check,ivy-fail,ivy-configure">
+    <sequential>
+      <ivy:retrieve conf="compile,compile.hadoop" type="jar,bundle" sync="${ivy.sync}" log="download-only"/>
+      <ivy:retrieve conf="test,test.DfsMiniCluster" type="jar,bundle,test" sync="${ivy.sync}" log="download-only"
+                    pattern="${test.lib.dir}/[artifact]-[revision](-[classifier]).[ext]"/>
+    </sequential>
+  </target>
 
   <target name="javacc" depends="javacc-QueryParser"/>
   <target name="javacc-QueryParser" depends="resolve-javacc">
@@ -67,7 +79,7 @@
   <target name="resolve-javacc" xmlns:ivy="antlib:org.apache.ivy.ant">
     <!-- setup a "fake" JavaCC distribution folder in ${build.dir} to make JavaCC ANT task happy: -->
     <ivy:retrieve organisation="net.java.dev.javacc" module="javacc" revision="5.0"
-      inline="true" conf="default" transitive="false" type="jar" sync="true"
+      inline="true" transitive="false" type="jar" sync="true"
       pattern="${build.dir}/javacc/bin/lib/[artifact].[ext]"/>
   </target>
 

Modified: lucene/dev/branches/lucene4956/solr/core/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/core/ivy.xml?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/core/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/solr/core/ivy.xml Sun Aug 11 12:19:13 2013
@@ -16,22 +16,57 @@
    specific language governing permissions and limitations
    under the License.    
 -->
-<ivy-module version="2.0">
-    <info organisation="org.apache.solr" module="core"/>
+<!DOCTYPE ivy-module [
+  <!ENTITY hadoop.version "2.0.5-alpha">
+]>
+<ivy-module version="2.0"  xmlns:maven="http://ant.apache.org/ivy/maven">
+  <info organisation="org.apache.solr" module="core"/>
+  
+  <configurations>
+    <!-- artifacts in the "compile" and "compile.hadoop" configurations will go into solr/core/lib/ -->
+    <conf name="compile" transitive="false"/>
+    <conf name="test" transitive="false"/>
+    <conf name="compile.hadoop" transitive="false"/>
+    <!-- artifacts in the "test" and "test.DfsMiniCluster" configuration will go into solr/core/test-lib/ -->
+    <conf name="test.DfsMiniCluster" transitive="false"/>
+  </configurations>
 
-    <dependencies>
-      <dependency org="commons-codec" name="commons-codec" rev="1.7" transitive="false"/>
-      <dependency org="commons-fileupload" name="commons-fileupload" rev="1.2.1" transitive="false"/>
-      <dependency org="commons-cli" name="commons-cli" rev="1.2" transitive="false"/>
-      <dependency org="commons-lang" name="commons-lang" rev="2.6" transitive="false"/>
-      <dependency org="com.google.guava" name="guava" rev="14.0.1" transitive="false"/>
-      <dependency org="org.easymock" name="easymock" rev="3.0" transitive="false"/>
-      <dependency org="cglib" name="cglib-nodep" rev="2.2" transitive="false"/>
-      <dependency org="org.objenesis" name="objenesis" rev="1.2" transitive="false"/>
-      <dependency org="com.spatial4j" name="spatial4j" rev="0.3" transitive="false"/>
-      <dependency org="javax.servlet" name="javax.servlet-api" rev="3.0.1" transitive="false"/>
-      <dependency org="org.restlet.jee" name="org.restlet" rev="2.1.1" transitive="false"/>
-      <dependency org="org.restlet.jee" name="org.restlet.ext.servlet" rev="2.1.1" transitive="false"/>
-      <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
-    </dependencies>
+  <dependencies>
+    <dependency org="commons-codec" name="commons-codec" rev="1.7" conf="compile->*"/>
+    <dependency org="commons-fileupload" name="commons-fileupload" rev="1.2.1" conf="compile->*"/>
+    <dependency org="commons-cli" name="commons-cli" rev="1.2" conf="compile->*"/>
+    <dependency org="commons-lang" name="commons-lang" rev="2.6" conf="compile->*"/>
+    <dependency org="com.google.guava" name="guava" rev="14.0.1" conf="compile->*"/>
+    <dependency org="com.spatial4j" name="spatial4j" rev="0.3" conf="compile->*"/>
+    <dependency org="org.restlet.jee" name="org.restlet" rev="2.1.1" conf="compile->*"/>
+    <dependency org="org.restlet.jee" name="org.restlet.ext.servlet" rev="2.1.1" conf="compile->*"/>
+    <dependency org="joda-time" name="joda-time" rev="2.2" conf="compile->*"/>
+
+    <dependency org="javax.servlet" name="javax.servlet-api" rev="3.0.1" conf="test->*"/>
+    <dependency org="org.easymock" name="easymock" rev="3.0" conf="test->*"/>
+    <dependency org="cglib" name="cglib-nodep" rev="2.2" conf="test->*"/>
+    <dependency org="org.objenesis" name="objenesis" rev="1.2" conf="test->*"/>
+
+    <dependency org="org.apache.hadoop" name="hadoop-common" rev="&hadoop.version;" conf="compile.hadoop->*"/>
+    <dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="&hadoop.version;" conf="compile.hadoop->*"/>
+    <dependency org="org.apache.hadoop" name="hadoop-annotations" rev="&hadoop.version;" conf="compile.hadoop->*"/>
+    <dependency org="org.apache.hadoop" name="hadoop-auth" rev="&hadoop.version;" conf="compile.hadoop->*"/>
+    <dependency org="commons-configuration" name="commons-configuration" rev="1.6" conf="compile.hadoop->*"/>
+    <dependency org="com.google.protobuf" name="protobuf-java" rev="2.4.0a" conf="compile.hadoop->*"/>
+    <dependency org="com.googlecode.concurrentlinkedhashmap" name="concurrentlinkedhashmap-lru" rev="1.2" conf="compile.hadoop->*"/>
+
+    <!-- Hadoop DfsMiniCluster Dependencies-->
+    <dependency org="org.apache.hadoop" name="hadoop-common" rev="&hadoop.version;" conf="test.DfsMiniCluster->*">
+      <artifact name="hadoop-common" type="test" ext="jar" maven:classifier="tests" />
+    </dependency>
+    <dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="&hadoop.version;" conf="test.DfsMiniCluster->*">
+      <artifact name="hadoop-hdfs" type="test" ext="jar" maven:classifier="tests" />
+    </dependency>
+    <dependency org="org.mortbay.jetty" name="jetty" rev="6.1.26" conf="test.DfsMiniCluster->*"/>
+    <dependency org="org.mortbay.jetty" name="jetty-util" rev="6.1.26" conf="test.DfsMiniCluster->*"/>
+    <dependency org="com.sun.jersey" name="jersey-core" rev="1.16" conf="test.DfsMiniCluster->*"/>
+    <dependency org="commons-collections" name="commons-collections" rev="3.2.1" conf="test.DfsMiniCluster->*"/>
+
+    <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
+  </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/SolrLogFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/SolrLogFormatter.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/SolrLogFormatter.java (original)
+++ lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/SolrLogFormatter.java Sun Aug 11 12:19:13 2013
@@ -17,21 +17,25 @@ package org.apache.solr;
  * limitations under the License.
  */
 
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.WeakHashMap;
+import java.util.logging.ConsoleHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.Replica;
-import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrRequestInfo;
 import org.slf4j.LoggerFactory;
 
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.WeakHashMap;
-import java.util.logging.*;
-
 public class SolrLogFormatter extends Formatter {
 
   /** Add this interface to a thread group and the string returned by
@@ -259,7 +263,7 @@ sb.append("(group_name=").append(tg.getN
 
   private Map<String,Object> getReplicaProps(ZkController zkController, SolrCore core) {
     final String collection = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
-    Replica replica = zkController.getClusterState().getReplica(collection, zkController.getCoreNodeName(core.getCoreDescriptor()));
+    Replica replica = zkController.getClusterState().getReplica(collection, core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName());
     if(replica!=null) {
       return replica.getProperties();
     }

Modified: lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java (original)
+++ lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java Sun Aug 11 12:19:13 2013
@@ -85,6 +85,7 @@ public class JettySolrRunner {
   private String shards;
 
   private String dataDir;
+  private String solrUlogDir;
   
   private volatile boolean startedBefore = false;
 
@@ -359,6 +360,9 @@ public class JettySolrRunner {
     if( dataDir != null) {
       System.setProperty("solr.data.dir", dataDir);
     }
+    if( solrUlogDir != null) {
+      System.setProperty("solr.ulog.dir", solrUlogDir);
+    }
     if(shards != null) {
       System.setProperty("shard", shards);
     }
@@ -382,6 +386,8 @@ public class JettySolrRunner {
     System.clearProperty("shard");
     System.clearProperty("solr.data.dir");
     System.clearProperty("coreNodeName");
+    System.clearProperty("solr.ulog.dir");
+
   }
 
   public void stop() throws Exception {
@@ -485,6 +491,10 @@ public class JettySolrRunner {
   public void setDataDir(String dataDir) {
     this.dataDir = dataDir;
   }
+  
+  public void setUlogDir(String ulogDir) {
+    this.solrUlogDir = ulogDir;
+  }
 
   public void setCoreNodeName(String coreNodeName) {
     this.coreNodeName = coreNodeName;

Modified: lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java (original)
+++ lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java Sun Aug 11 12:19:13 2013
@@ -17,11 +17,16 @@ package org.apache.solr.cloud;
  * limitations under the License.
  */
 
-import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.util.PropertiesUtil;
+
+import java.util.Properties;
 
 public class CloudDescriptor {
+
   private String shardId;
   private String collectionName;
   private SolrParams params;
@@ -36,6 +41,21 @@ public class CloudDescriptor {
 
   volatile boolean isLeader = false;
   volatile String lastPublished = ZkStateReader.ACTIVE;
+
+  public static final String SHARD_STATE = "shardState";
+  public static final String NUM_SHARDS = "numShards";
+  public static final String SHARD_RANGE = "shardRange";
+
+  public CloudDescriptor(String coreName, Properties props) {
+    this.shardId = props.getProperty(CoreDescriptor.CORE_SHARD, null);
+    // If no collection name is specified, we default to the core name
+    this.collectionName = props.getProperty(CoreDescriptor.CORE_COLLECTION, coreName);
+    this.roles = props.getProperty(CoreDescriptor.CORE_ROLES, null);
+    this.nodeName = props.getProperty(CoreDescriptor.CORE_NODE_NAME);
+    this.shardState = props.getProperty(CloudDescriptor.SHARD_STATE, Slice.ACTIVE);
+    this.numShards = PropertiesUtil.toInteger(props.getProperty(CloudDescriptor.NUM_SHARDS), null);
+    this.shardRange = props.getProperty(CloudDescriptor.SHARD_RANGE, null);
+  }
   
   public String getLastPublished() {
     return lastPublished;
@@ -44,6 +64,10 @@ public class CloudDescriptor {
   public boolean isLeader() {
     return isLeader;
   }
+  
+  public void setLeader(boolean isLeader) {
+    this.isLeader = isLeader;
+  }
 
   public void setShardId(String shardId) {
     this.shardId = shardId;

Modified: lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java (original)
+++ lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java Sun Aug 11 12:19:13 2013
@@ -1,8 +1,5 @@
 package org.apache.solr.cloud;
 
-import java.io.IOException;
-import java.util.Map;
-
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.cloud.ClusterState;
@@ -21,6 +18,9 @@ import org.apache.zookeeper.KeeperExcept
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+import java.util.Map;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -47,9 +47,9 @@ public abstract class ElectionContext {
   String leaderSeqPath;
   private SolrZkClient zkClient;
   
-  public ElectionContext(final String shardZkNodeName,
+  public ElectionContext(final String coreNodeName,
       final String electionPath, final String leaderPath, final ZkNodeProps leaderProps, final SolrZkClient zkClient) {
-    this.id = shardZkNodeName;
+    this.id = coreNodeName;
     this.electionPath = electionPath;
     this.leaderPath = leaderPath;
     this.leaderProps = leaderProps;
@@ -78,8 +78,8 @@ class ShardLeaderElectionContextBase ext
   protected LeaderElector leaderElector;
 
   public ShardLeaderElectionContextBase(LeaderElector leaderElector, final String shardId,
-      final String collection, final String shardZkNodeName, ZkNodeProps props, ZkStateReader zkStateReader) {
-    super(shardZkNodeName, ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + "/leader_elect/"
+      final String collection, final String coreNodeName, ZkNodeProps props, ZkStateReader zkStateReader) {
+    super(coreNodeName, ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + "/leader_elect/"
         + shardId, ZkStateReader.getShardLeadersPath(collection, shardId),
         props, zkStateReader.getZkClient());
     this.leaderElector = leaderElector;
@@ -95,7 +95,7 @@ class ShardLeaderElectionContextBase ext
     zkClient.makePath(leaderPath, ZkStateReader.toJSON(leaderProps),
         CreateMode.EPHEMERAL, true);
     assert shardId != null;
-    ZkNodeProps m = ZkNodeProps.fromKeyVals(Overseer.QUEUE_OPERATION, "leader",
+    ZkNodeProps m = ZkNodeProps.fromKeyVals(Overseer.QUEUE_OPERATION, ZkStateReader.LEADER_PROP,
         ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP,
         collection, ZkStateReader.BASE_URL_PROP, leaderProps.getProperties()
             .get(ZkStateReader.BASE_URL_PROP), ZkStateReader.CORE_NAME_PROP,
@@ -119,8 +119,8 @@ final class ShardLeaderElectionContext e
   
   public ShardLeaderElectionContext(LeaderElector leaderElector, 
       final String shardId, final String collection,
-      final String shardZkNodeName, ZkNodeProps props, ZkController zkController, CoreContainer cc) {
-    super(leaderElector, shardId, collection, shardZkNodeName, props,
+      final String coreNodeName, ZkNodeProps props, ZkController zkController, CoreContainer cc) {
+    super(leaderElector, shardId, collection, coreNodeName, props,
         zkController.getZkStateReader());
     this.zkController = zkController;
     this.cc = cc;
@@ -138,18 +138,18 @@ final class ShardLeaderElectionContext e
   @Override
   void runLeaderProcess(boolean weAreReplacement) throws KeeperException,
       InterruptedException, IOException {
-    log.info("Running the leader process.");
+    log.info("Running the leader process for shard " + shardId);
     
     String coreName = leaderProps.getStr(ZkStateReader.CORE_NAME_PROP);
     
     // clear the leader in clusterstate
-    ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, "leader",
+    ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, ZkStateReader.LEADER_PROP,
         ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP,
         collection);
     Overseer.getInQueue(zkClient).offer(ZkStateReader.toJSON(m));
     
-    String leaderVoteWait = cc.getZkController().getLeaderVoteWait();
-    if (!weAreReplacement && leaderVoteWait != null) {
+    int leaderVoteWait = cc.getZkController().getLeaderVoteWait();
+    if (!weAreReplacement) {
       waitForReplicasToComeUp(weAreReplacement, leaderVoteWait);
     }
     
@@ -243,8 +243,8 @@ final class ShardLeaderElectionContext e
       }
 
       log.info("I am the new leader: "
-          + ZkCoreNodeProps.getCoreUrl(leaderProps));
-      core.getCoreDescriptor().getCloudDescriptor().isLeader = true;
+          + ZkCoreNodeProps.getCoreUrl(leaderProps) + " " + shardId);
+      core.getCoreDescriptor().getCloudDescriptor().setLeader(true);
     } finally {
       if (core != null) {
         core.close();
@@ -254,16 +254,17 @@ final class ShardLeaderElectionContext e
     try {
       super.runLeaderProcess(weAreReplacement);
     } catch (Throwable t) {
+      SolrException.log(log, "There was a problem trying to register as the leader", t);
+      cancelElection();
       try {
         core = cc.getCore(coreName);
         if (core == null) {
-          cancelElection();
           throw new SolrException(ErrorCode.SERVER_ERROR,
               "Fatal Error, SolrCore not found:" + coreName + " in "
                   + cc.getCoreNames());
         }
         
-        core.getCoreDescriptor().getCloudDescriptor().isLeader = false;
+        core.getCoreDescriptor().getCloudDescriptor().setLeader(false);
         
         // we could not publish ourselves as leader - rejoin election
         rejoinLeaderElection(leaderSeqPath, core);
@@ -308,8 +309,7 @@ final class ShardLeaderElectionContext e
   }
 
   private void waitForReplicasToComeUp(boolean weAreReplacement,
-      String leaderVoteWait) throws InterruptedException {
-    int timeout = Integer.parseInt(leaderVoteWait);
+      int timeout) throws InterruptedException {
     long timeoutAt = System.currentTimeMillis() + timeout;
     final String shardsElectZkPath = electionPath + LeaderElector.ELECTION_NODE;
     
@@ -332,7 +332,7 @@ final class ShardLeaderElectionContext e
           return;
         } else {
           if (cnt % 40 == 0) {
-            log.info("Waiting until we see more replicas up: total="
+            log.info("Waiting until we see more replicas up for shard " + shardId + ": total="
               + slices.getReplicasMap().size() + " found=" + found
               + " timeoutin=" + (timeoutAt - System.currentTimeMillis()));
           }

Modified: lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/Overseer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/Overseer.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/Overseer.java (original)
+++ lucene/dev/branches/lucene4956/solr/core/src/java/org/apache/solr/cloud/Overseer.java Sun Aug 11 12:19:13 2013
@@ -17,17 +17,8 @@ package org.apache.solr.cloud;
  * the License.
  */
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.cloud.ClosableThread;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
@@ -45,20 +36,32 @@ import org.apache.zookeeper.KeeperExcept
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
 /**
  * Cluster leader. Responsible node assignments, cluster state file?
  */
 public class Overseer {
   public static final String QUEUE_OPERATION = "operation";
+  public static final String DELETECORE = "deletecore";
   public static final String REMOVECOLLECTION = "removecollection";
-  
+  public static final String REMOVESHARD = "removeshard";
+
   private static final int STATE_UPDATE_DELAY = 1500;  // delay between cloud state updates
 
+
   private static Logger log = LoggerFactory.getLogger(Overseer.class);
   
   private class ClusterStateUpdater implements Runnable, ClosableThread {
     
-    private static final String DELETECORE = "deletecore";
     private final ZkStateReader reader;
     private final SolrZkClient zkClient;
     private final String myId;
@@ -180,6 +183,8 @@ public class Overseer {
         clusterState = removeCore(clusterState, message);
       } else if (REMOVECOLLECTION.equals(operation)) {
         clusterState = removeCollection(clusterState, message);
+      } else if (REMOVESHARD.equals(operation)) {
+        clusterState = removeShard(clusterState, message);
       } else if (ZkStateReader.LEADER_PROP.equals(operation)) {
 
         StringBuilder sb = new StringBuilder();
@@ -198,13 +203,36 @@ public class Overseer {
         clusterState = createShard(clusterState, message);
       } else if ("updateshardstate".equals(operation))  {
         clusterState = updateShardState(clusterState, message);
+      } else if (OverseerCollectionProcessor.CREATECOLLECTION.equals(operation)) {
+         clusterState = buildCollection(clusterState, message);
       } else {
         throw new RuntimeException("unknown operation:" + operation
             + " contents:" + message.getProperties());
       }
       return clusterState;
     }
-      
+
+    private ClusterState buildCollection(ClusterState clusterState, ZkNodeProps message) {
+      String collection = message.getStr("name");
+      log.info("building a new collection: " + collection);
+      if(clusterState.getCollections().contains(collection) ){
+        log.warn("Collection {} already exists. exit" ,collection);
+        return clusterState;
+      }
+
+      ArrayList<String> shardNames = new ArrayList<String>();
+
+      if(ImplicitDocRouter.NAME.equals( message.getStr("router",DocRouter.DEFAULT_NAME))){
+        getShardNames(shardNames,message.getStr("shards",DocRouter.DEFAULT_NAME));
+      } else {
+        int numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, -1);
+        if(numShards<1) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,"numShards is a required parameter for 'compositeId' router");
+        getShardNames(numShards, shardNames);
+      }
+
+      return createCollection(clusterState,collection,shardNames,message);
+    }
+
     private ClusterState updateShardState(ClusterState clusterState, ZkNodeProps message) {
       String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
       log.info("Update shard state invoked for collection: " + collection);
@@ -265,23 +293,51 @@ public class Overseer {
        */
       private ClusterState updateState(ClusterState state, final ZkNodeProps message) {
         final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+        assert collection.length() > 0 : message;
+        
+        try {
+          if (!zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, true)) {
+            log.warn("Could not find collection node for " + collection + ", skipping publish state");
+          }
+        } catch (KeeperException e) {
+          throw new SolrException(ErrorCode.SERVER_ERROR, e);
+        } catch (InterruptedException e) {
+          Thread.currentThread().interrupt();
+          throw new SolrException(ErrorCode.SERVER_ERROR, e);
+        }
+        
         String coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
         if (coreNodeName == null) {
-          // it must be the default then
-          coreNodeName = message.getStr(ZkStateReader.NODE_NAME_PROP) + "_" + message.getStr(ZkStateReader.CORE_NAME_PROP);
+          coreNodeName = getAssignedCoreNodeName(state, message);
+          if (coreNodeName != null) {
+            log.info("node=" + coreNodeName + " is already registered");
+          } else {
+            // if coreNodeName is null, auto assign one
+            coreNodeName = Assign.assignNode(collection, state);
+          }
+          message.getProperties().put(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
         }
-        Integer numShards = message.getStr(ZkStateReader.NUM_SHARDS_PROP)!=null?Integer.parseInt(message.getStr(ZkStateReader.NUM_SHARDS_PROP)):null;
+        Integer numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, null);
         log.info("Update state numShards={} message={}", numShards, message);
+
+        String router = message.getStr(OverseerCollectionProcessor.ROUTER,DocRouter.DEFAULT_NAME);
+        List<String> shardNames  = new ArrayList<String>();
+
         //collection does not yet exist, create placeholders if num shards is specified
         boolean collectionExists = state.getCollections().contains(collection);
         if (!collectionExists && numShards!=null) {
-          state = createCollection(state, collection, numShards);
+          if(ImplicitDocRouter.NAME.equals(router)){
+            getShardNames(shardNames, message.getStr("shards",null));
+            numShards = shardNames.size();
+          }else {
+            getShardNames(numShards, shardNames);
+          }
+          state = createCollection(state, collection, shardNames, message);
         }
         
         // use the provided non null shardId
         String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP);
         if (sliceName == null) {
-          //String nodeName = message.getStr(ZkStateReader.NODE_NAME_PROP);
           //get shardId from ClusterState
           sliceName = getAssignedId(state, coreNodeName, message);
           if (sliceName != null) {
@@ -295,8 +351,8 @@ public class Overseer {
             numShards = state.getCollectionStates().get(collection).getSlices().size();
             log.info("Collection already exists with " + ZkStateReader.NUM_SHARDS_PROP + "=" + numShards);
           }
-          sliceName = AssignShard.assignShard(collection, state, numShards);
-          log.info("Assigning new node to shard=" + sliceName);
+          sliceName = Assign.assignShard(collection, state, numShards);
+          log.info("Assigning new node to shard shard=" + sliceName);
         }
 
         Slice slice = state.getSlice(collection, sliceName);
@@ -320,8 +376,11 @@ public class Overseer {
           }
         }
 
-        // we don't put num_shards in the clusterstate
+        // we don't put these in the clusterstate
           replicaProps.remove(ZkStateReader.NUM_SHARDS_PROP);
+          replicaProps.remove(ZkStateReader.CORE_NODE_NAME_PROP);
+          replicaProps.remove(ZkStateReader.SHARD_ID_PROP);
+          replicaProps.remove(ZkStateReader.COLLECTION_PROP);
           replicaProps.remove(QUEUE_OPERATION);
           
           // remove any props with null values
@@ -365,34 +424,42 @@ public class Overseer {
           return newClusterState;
       }
 
-    private  Map<String,Object> defaultCollectionProps() {
-      HashMap<String,Object> props = new HashMap<String, Object>(2);
-      props.put(DocCollection.DOC_ROUTER, DocRouter.DEFAULT_NAME);
-      return props;
-    }
+      private ClusterState createCollection(ClusterState state, String collectionName, List<String> shards , ZkNodeProps message) {
+        log.info("Create collection {} with shards {}", collectionName, shards);;
 
-      private ClusterState createCollection(ClusterState state, String collectionName, int numShards) {
-        log.info("Create collection {} with numShards {}", collectionName, numShards);
+        String routerName = message.getStr(OverseerCollectionProcessor.ROUTER,DocRouter.DEFAULT_NAME);
+        DocRouter router = DocRouter.getDocRouter(routerName);
 
-        DocRouter router = DocRouter.DEFAULT;
-        List<DocRouter.Range> ranges = router.partitionRange(numShards, router.fullRange());
+        List<DocRouter.Range> ranges = router.partitionRange(shards.size(), router.fullRange());
 
         Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>();
 
 
         Map<String, Slice> newSlices = new LinkedHashMap<String,Slice>();
         newCollections.putAll(state.getCollectionStates());
+        for (int i = 0; i < shards.size(); i++) {
+          String sliceName = shards.get(i);
+        /*}
         for (int i = 0; i < numShards; i++) {
-          final String sliceName = "shard" + (i+1);
+          final String sliceName = "shard" + (i+1);*/
 
-          Map<String,Object> sliceProps = new LinkedHashMap<String,Object>(1);
-          sliceProps.put(Slice.RANGE, ranges.get(i));
+          Map<String, Object> sliceProps = new LinkedHashMap<String, Object>(1);
+          sliceProps.put(Slice.RANGE, ranges == null? null: ranges.get(i));
 
           newSlices.put(sliceName, new Slice(sliceName, null, sliceProps));
         }
 
         // TODO: fill in with collection properties read from the /collections/<collectionName> node
-        Map<String,Object> collectionProps = defaultCollectionProps();
+        Map<String,Object> collectionProps = new HashMap<String,Object>();
+
+        for (Entry<String, Object> e : OverseerCollectionProcessor.COLL_PROPS.entrySet()) {
+          Object val = message.get(e.getKey());
+          if(val == null){
+            val = OverseerCollectionProcessor.COLL_PROPS.get(e.getKey());
+          }
+          if(val != null) collectionProps.put(e.getKey(),val);
+        }
+        collectionProps.put(DocCollection.DOC_ROUTER, routerName);
 
         DocCollection newCollection = new DocCollection(collectionName, newSlices, collectionProps, router);
 
@@ -417,10 +484,29 @@ public class Overseer {
         return null;
       }
       
+      private String getAssignedCoreNodeName(ClusterState state, ZkNodeProps message) {
+        Collection<Slice> slices = state.getSlices(message.getStr(ZkStateReader.COLLECTION_PROP));
+        if (slices != null) {
+          for (Slice slice : slices) {
+            for (Replica replica : slice.getReplicas()) {
+              String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
+              String core = replica.getStr(ZkStateReader.CORE_NAME_PROP);
+              
+              String msgBaseUrl = message.getStr(ZkStateReader.BASE_URL_PROP);
+              String msgCore = message.getStr(ZkStateReader.CORE_NAME_PROP);
+              
+              if (baseUrl.equals(msgBaseUrl) && core.equals(msgCore)) {
+                return replica.getName();
+              }
+            }
+          }
+        }
+        return null;
+      }
+      
       private ClusterState updateSlice(ClusterState state, String collectionName, Slice slice) {
         // System.out.println("###!!!### OLD CLUSTERSTATE: " + JSONUtil.toJSON(state.getCollectionStates()));
         // System.out.println("Updating slice:" + slice);
-
         Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(state.getCollectionStates());  // make a shallow copy
         DocCollection coll = newCollections.get(collectionName);
         Map<String,Slice> slices;
@@ -519,17 +605,33 @@ public class Overseer {
         ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newCollections);
         return newState;
       }
-      
-      /*
+
+    /*
+     * Remove collection slice from cloudstate
+     */
+    private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
+
+      final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
+      final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
+
+      final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
+      DocCollection coll = newCollections.get(collection);
+
+      Map<String, Slice> newSlices = new LinkedHashMap<String, Slice>(coll.getSlicesMap());
+      newSlices.remove(sliceId);
+
+      DocCollection newCollection = new DocCollection(coll.getName(), newSlices, coll.getProperties(), coll.getRouter());
+      newCollections.put(newCollection.getName(), newCollection);
+
+      return new ClusterState(clusterState.getLiveNodes(), newCollections);
+    }
+
+    /*
        * Remove core from cloudstate
        */
       private ClusterState removeCore(final ClusterState clusterState, ZkNodeProps message) {
         
         String cnn = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
-        if (cnn == null) {
-          // it must be the default then
-          cnn = message.getStr(ZkStateReader.NODE_NAME_PROP) + "_" + message.getStr(ZkStateReader.CORE_NAME_PROP);
-        }
 
         final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
 
@@ -537,6 +639,15 @@ public class Overseer {
         DocCollection coll = newCollections.get(collection);
         if (coll == null) {
           // TODO: log/error that we didn't find it?
+          // just in case, remove the zk collection node
+          try {
+            zkClient.clean("/collections/" + collection);
+          } catch (InterruptedException e) {
+            SolrException.log(log, "Cleaning up collection in zk was interrupted:" + collection, e);
+            Thread.currentThread().interrupt();
+          } catch (KeeperException e) {
+            SolrException.log(log, "Problem cleaning up collection in zk:" + collection, e);
+          }
           return clusterState;
         }
 
@@ -610,6 +721,28 @@ public class Overseer {
     
   }
 
+  static void getShardNames(Integer numShards, List<String> shardNames) {
+    if(numShards == null)
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "numShards" + " is a required param");
+    for (int i = 0; i < numShards; i++) {
+      final String sliceName = "shard" + (i + 1);
+      shardNames.add(sliceName);
+    }
+
+  }
+
+  static void getShardNames(List<String> shardNames, String shards) {
+    if(shards ==null)
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "shards" + " is a required param");
+    for (String s : shards.split(",")) {
+      if(s ==null || s.trim().isEmpty()) continue;
+      shardNames.add(s.trim());
+    }
+    if(shardNames.isEmpty())
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "shards" + " is a required param");
+
+  }
+
   class OverseerThread extends Thread implements ClosableThread {
 
     private volatile boolean isClosed;