You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2013/05/29 17:46:29 UTC
svn commit: r1487521 - in /nutch/branches/2.x: ./ conf/
src/java/org/apache/nutch/indexer/solr/
Author: fenglu
Date: Wed May 29 15:46:29 2013
New Revision: 1487521
URL: http://svn.apache.org/r1487521
Log:
NUTCH-1575 support solr authentication in nutch 2.x
Added:
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/conf/nutch-default.xml
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Wed May 29 15:46:29 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.2 - Current Development
+* NUTCH-1575 support solr authentication in nutch 2.x (Feng)
+
* NUTCH-1569 Upgrade 2.x to Gora 0.3 (lewismc)
* NUTCH-1243 Junit jar removed from lib (lewismc)
Modified: nutch/branches/2.x/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/nutch-default.xml?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/conf/nutch-default.xml (original)
+++ nutch/branches/2.x/conf/nutch-default.xml Wed May 29 15:46:29 2013
@@ -1119,6 +1119,16 @@
</description>
</property>
+<property>
+ <name>solr.auth</name>
+ <value>false</value>
+ <description>
+ Whether to enable HTTP basic authentication for communicating with Solr.
+ Use the solr.auth.username and solr.auth.password properties to configure
+ your credentials.
+ </description>
+</property>
+
<!-- elasticsearch index properties -->
<property>
Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrConstants.java Wed May 29 15:46:29 2013
@@ -26,6 +26,12 @@ public interface SolrConstants {
public static final String COMMIT_INDEX = SOLR_PREFIX + "commit.index";
public static final String MAPPING_FILE = SOLR_PREFIX + "mapping.file";
+
+ public static final String USE_AUTH = SOLR_PREFIX + "auth";
+
+ public static final String USERNAME = SOLR_PREFIX + "auth.username";
+
+ public static final String PASSWORD = SOLR_PREFIX + "auth.password";
public static final String ID_FIELD = "id";
Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java Wed May 29 15:46:29 2013
@@ -229,7 +229,7 @@ implements Tool {
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
int numSplits = context.getNumReduceTasks();
- SolrServer solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+ SolrServer solr = SolrUtils.getCommonsHttpSolrServer(conf);
final SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
solrQuery.setFields(SolrConstants.ID_FIELD);
@@ -259,7 +259,7 @@ implements Tool {
public RecordReader<Text, SolrRecord> createRecordReader(InputSplit split,
TaskAttemptContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
- SolrServer solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+ SolrServer solr = SolrUtils.getCommonsHttpSolrServer(conf);
SolrInputSplit solrSplit = (SolrInputSplit) split;
final int numDocs = (int) solrSplit.getLength();
@@ -304,7 +304,7 @@ implements Tool {
public void setup(Context job) throws IOException {
Configuration conf = job.getConfiguration();
try {
- solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+ solr = SolrUtils.getCommonsHttpSolrServer(conf);
} catch (MalformedURLException e) {
throw new IOException(e);
}
Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java Wed May 29 15:46:29 2013
@@ -55,7 +55,8 @@ public class SolrIndexerJob extends Inde
Nutch.ARG_SOLR, solrUrl,
Nutch.ARG_BATCH, batchId));
// do the commits once and for all the reducers in one go
- SolrServer solr = new CommonsHttpSolrServer(solrUrl);
+ getConf().set(SolrConstants.SERVER_URL,solrUrl);
+ SolrServer solr = SolrUtils.getCommonsHttpSolrServer(getConf());
if (getConf().getBoolean(SolrConstants.COMMIT_INDEX, true)) {
solr.commit();
}
Added: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java?rev=1487521&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrUtils.java Wed May 29 15:46:29 2013
@@ -0,0 +1,61 @@
+package org.apache.nutch.indexer.solr;
+
+
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.auth.AuthScope;
+import org.apache.commons.httpclient.UsernamePasswordCredentials;
+import org.apache.commons.httpclient.params.HttpClientParams;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
+
+import java.net.MalformedURLException;
+
+public class SolrUtils {
+
+ public static Logger LOG = LoggerFactory.getLogger(SolrIndexerJob.class);
+
+ public static CommonsHttpSolrServer getCommonsHttpSolrServer(Configuration job) throws MalformedURLException {
+ HttpClient client=new HttpClient();
+
+ // Check for username/password
+ if (job.getBoolean(SolrConstants.USE_AUTH, false)) {
+ String username = job.get(SolrConstants.USERNAME);
+
+ LOG.info("Authenticating as: " + username);
+
+ AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
+
+ client.getState().setCredentials(scope, new UsernamePasswordCredentials(username, job.get(SolrConstants.PASSWORD)));
+
+ HttpClientParams params = client.getParams();
+ params.setAuthenticationPreemptive(true);
+
+ client.setParams(params);
+ }
+
+ return new CommonsHttpSolrServer(job.get(SolrConstants.SERVER_URL), client);
+ }
+
+ public static String stripNonCharCodepoints(String input) {
+ StringBuilder retval = new StringBuilder();
+ char ch;
+
+ for (int i = 0; i < input.length(); i++) {
+ ch = input.charAt(i);
+
+ // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+ // and non-printable control characters except tabulator, new line and carriage return
+ if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
+ ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
+ (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
+ (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
+
+ retval.append(ch);
+ }
+ }
+
+ return retval.toString();
+ }
+}
\ No newline at end of file
Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java?rev=1487521&r1=1487520&r2=1487521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java Wed May 29 15:46:29 2013
@@ -29,7 +29,6 @@ import org.apache.nutch.indexer.NutchDoc
import org.apache.nutch.indexer.NutchIndexWriter;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.common.SolrInputDocument;
public class SolrWriter implements NutchIndexWriter {
@@ -48,7 +47,7 @@ public class SolrWriter implements Nutch
public void open(TaskAttemptContext job)
throws IOException {
Configuration conf = job.getConfiguration();
- solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+ solr = SolrUtils.getCommonsHttpSolrServer(conf);
commitSize = conf.getInt(SolrConstants.COMMIT_SIZE, 1000);
solrMapping = SolrMappingReader.getInstance(conf);
}
@@ -61,7 +60,7 @@ public class SolrWriter implements Nutch
Object val2 = val;
if (e.getKey().equals("content") || e.getKey().equals("title")) {
- val2 = stripNonCharCodepoints(val);
+ val2 = SolrUtils.stripNonCharCodepoints(val);
}
inputDoc.addField(solrMapping.mapKey(e.getKey()), val2);
@@ -97,25 +96,4 @@ public class SolrWriter implements Nutch
}
}
- public static String stripNonCharCodepoints(String input) {
- StringBuilder retval = new StringBuilder();
- char ch;
-
- for (int i = 0; i < input.length(); i++) {
- ch = input.charAt(i);
-
- // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
- // and non-printable control characters except tabulator, new line and carriage return
- if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
- ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
- (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
- (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
-
- retval.append(ch);
- }
- }
-
- return retval.toString();
- }
-
}