You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2013/05/29 17:46:29 UTC

svn commit: r1487521 - in /nutch/branches/2.x: ./ conf/ src/java/org/apache/nutch/indexer/solr/

Author: fenglu
Date: Wed May 29 15:46:29 2013
New Revision: 1487521

URL: http://svn.apache.org/r1487521
Log:
NUTCH-1575 support solr authentication in nutch 2.x

Added:
    nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java
Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/conf/nutch-default.xml
    nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java
    nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
    nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
    nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Wed May 29 15:46:29 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.2 - Current Development
 
+* NUTCH-1575 support solr authentication in nutch 2.x (Feng)
+
 * NUTCH-1569 Upgrade 2.x to Gora 0.3 (lewismc)
 
 * NUTCH-1243 Junit jar removed from lib (lewismc)

Modified: nutch/branches/2.x/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/nutch-default.xml?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/conf/nutch-default.xml (original)
+++ nutch/branches/2.x/conf/nutch-default.xml Wed May 29 15:46:29 2013
@@ -1119,6 +1119,16 @@
   </description>
 </property>
 
+<property>
+  <name>solr.auth</name>
+  <value>false</value>
+  <description>
+  Whether to enable HTTP basic authentication for communicating with Solr.
+  Use the solr.auth.username and solr.auth.password properties to configure
+  your credentials.
+  </description>
+</property>
+
 <!-- elasticsearch index properties -->
 
 <property>

Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java Wed May 29 15:46:29 2013
@@ -26,6 +26,12 @@ public interface SolrConstants {
   public static final String COMMIT_INDEX = SOLR_PREFIX + "commit.index";
 
   public static final String MAPPING_FILE = SOLR_PREFIX + "mapping.file";
+
+  public static final String USE_AUTH = SOLR_PREFIX + "auth";
+
+  public static final String USERNAME = SOLR_PREFIX + "auth.username";
+
+  public static final String PASSWORD = SOLR_PREFIX + "auth.password";
   
   public static final String ID_FIELD = "id";
   

Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java Wed May 29 15:46:29 2013
@@ -229,7 +229,7 @@ implements Tool {
     throws IOException, InterruptedException {
       Configuration conf = context.getConfiguration();
       int numSplits = context.getNumReduceTasks();
-      SolrServer solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+      SolrServer solr = SolrUtils.getCommonsHttpSolrServer(conf);
 
       final SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
       solrQuery.setFields(SolrConstants.ID_FIELD);
@@ -259,7 +259,7 @@ implements Tool {
     public RecordReader<Text, SolrRecord> createRecordReader(InputSplit split,
         TaskAttemptContext context) throws IOException, InterruptedException {
       Configuration conf = context.getConfiguration();
-      SolrServer solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+      SolrServer solr = SolrUtils.getCommonsHttpSolrServer(conf);
       SolrInputSplit solrSplit = (SolrInputSplit) split;
       final int numDocs = (int) solrSplit.getLength();
       
@@ -304,7 +304,7 @@ implements Tool {
   public void setup(Context job) throws IOException {
     Configuration conf = job.getConfiguration();
     try {
-      solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+      solr = SolrUtils.getCommonsHttpSolrServer(conf);
     } catch (MalformedURLException e) {
       throw new IOException(e);
     }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java Wed May 29 15:46:29 2013
@@ -55,7 +55,8 @@ public class SolrIndexerJob extends Inde
         Nutch.ARG_SOLR, solrUrl,
         Nutch.ARG_BATCH, batchId));
     // do the commits once and for all the reducers in one go
-    SolrServer solr = new CommonsHttpSolrServer(solrUrl);
+    getConf().set(SolrConstants.SERVER_URL,solrUrl);
+    SolrServer solr = SolrUtils.getCommonsHttpSolrServer(getConf());
     if (getConf().getBoolean(SolrConstants.COMMIT_INDEX, true)) {
       solr.commit();
     }

Added: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java?rev=1487521&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java Wed May 29 15:46:29 2013
@@ -0,0 +1,61 @@
+package org.apache.nutch.indexer.solr;
+
+
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.auth.AuthScope;
+import org.apache.commons.httpclient.UsernamePasswordCredentials;
+import org.apache.commons.httpclient.params.HttpClientParams;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
+
+import java.net.MalformedURLException;
+
+public class SolrUtils {
+
+  public static Logger LOG = LoggerFactory.getLogger(SolrIndexerJob.class);
+
+  public static CommonsHttpSolrServer getCommonsHttpSolrServer(Configuration job) throws MalformedURLException {
+    HttpClient client=new HttpClient();
+
+    // Check for username/password
+    if (job.getBoolean(SolrConstants.USE_AUTH, false)) {
+      String username = job.get(SolrConstants.USERNAME);
+
+      LOG.info("Authenticating as: " + username);
+
+      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
+
+      client.getState().setCredentials(scope, new UsernamePasswordCredentials(username, job.get(SolrConstants.PASSWORD)));
+
+      HttpClientParams params = client.getParams();
+      params.setAuthenticationPreemptive(true);
+
+      client.setParams(params);
+    }
+
+    return new CommonsHttpSolrServer(job.get(SolrConstants.SERVER_URL), client);
+  }
+
+  public static String stripNonCharCodepoints(String input) {
+    StringBuilder retval = new StringBuilder();
+    char ch;
+
+    for (int i = 0; i < input.length(); i++) {
+      ch = input.charAt(i);
+
+      // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+      // and non-printable control characters except tabulator, new line and carriage return
+      if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
+              ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
+              (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
+              (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
+
+        retval.append(ch);
+      }
+    }
+
+    return retval.toString();
+  }
+}
\ No newline at end of file

Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java Wed May 29 15:46:29 2013
@@ -29,7 +29,6 @@ import org.apache.nutch.indexer.NutchDoc
 import org.apache.nutch.indexer.NutchIndexWriter;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
 import org.apache.solr.common.SolrInputDocument;
 
 public class SolrWriter implements NutchIndexWriter {
@@ -48,7 +47,7 @@ public class SolrWriter implements Nutch
   public void open(TaskAttemptContext job)
   throws IOException {
     Configuration conf = job.getConfiguration();
-    solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+    solr = SolrUtils.getCommonsHttpSolrServer(conf);
     commitSize = conf.getInt(SolrConstants.COMMIT_SIZE, 1000);
     solrMapping = SolrMappingReader.getInstance(conf);
   }
@@ -61,7 +60,7 @@ public class SolrWriter implements Nutch
 
         Object val2 = val;
         if (e.getKey().equals("content") || e.getKey().equals("title")) {
-          val2 = stripNonCharCodepoints(val);
+          val2 = SolrUtils.stripNonCharCodepoints(val);
         }
 
         inputDoc.addField(solrMapping.mapKey(e.getKey()), val2);
@@ -97,25 +96,4 @@ public class SolrWriter implements Nutch
     }
   }
 
-  public static String stripNonCharCodepoints(String input) {
-    StringBuilder retval = new StringBuilder();
-    char ch;
-
-    for (int i = 0; i < input.length(); i++) {
-      ch = input.charAt(i);
-
-      // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
-      // and non-printable control characters except tabulator, new line and carriage return
-      if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
-          ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
-          (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
-          (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
-
-        retval.append(ch);
-      }
-    }
-
-    return retval.toString();
-  }
-
 }