You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2023/06/06 12:51:25 UTC
[nutch] branch master updated: NUTCH-2991 Support HTTP/S Header Authorization for Solr connections (#763)
This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 9109bdd74 NUTCH-2991 Support HTTP/S Header Authorization for Solr connections (#763)
9109bdd74 is described below
commit 9109bdd740ba578fcffff17745ebc9f53f464667
Author: Sebastian Nagel <sn...@apache.org>
AuthorDate: Tue Jun 6 14:51:20 2023 +0200
NUTCH-2991 Support HTTP/S Header Authorization for Solr connections (#763)
NUTCH-2991 Support HTTP/S Header Authorization for Solr connections
(patch contributed by Marcos Gomez)
- adds params auth.header.name and auth.header.value for JWT Authentication
with Bearer Tokens sent via the HTTP Authorization header connections
- also document basic authentication and improve error message when reading the configuration fails
---
conf/index-writers.xml.template | 19 ++++-
.../org/apache/nutch/indexer/IndexWriters.java | 2 +-
.../nutch/indexwriter/solr/SolrConstants.java | 4 +
.../nutch/indexwriter/solr/SolrIndexWriter.java | 47 ++++++++---
.../apache/nutch/indexwriter/solr/SolrUtils.java | 94 +++++++++++++++++++++-
5 files changed, 153 insertions(+), 13 deletions(-)
diff --git a/conf/index-writers.xml.template b/conf/index-writers.xml.template
index 549ebd4c9..6ed341cb7 100644
--- a/conf/index-writers.xml.template
+++ b/conf/index-writers.xml.template
@@ -26,9 +26,24 @@
<param name="collection" value=""/>
<param name="weight.field" value=""/>
<param name="commitSize" value="1000"/>
+ <!-- enable authentication. In addition, set username and
+ password for basic authentication, or pass the Bearer
+ token via Authentication header. See below.
+ -->
<param name="auth" value="false"/>
- <param name="username" value="username"/>
- <param name="password" value="password"/>
+ <!-- username and password for basic authentication -->
+ <param name="username" value=""/>
+ <param name="password" value=""/>
+ <!-- Name for Authorization HTTP header
+ <param name="auth.header.name" value="Bearer"/>
+ HTTP header -> Authorization: Bearer 1234567890
+ -->
+ <param name="auth.header.name" value=""/>
+ <!-- Value for Authorization HTTP header
+ <param name="auth.header.value" value="1234567890"/>
+ HTTP header -> Authorization: Bearer 1234567890
+ -->
+ <param name="auth.header.value" value=""/>
</parameters>
<mapping>
<copy>
diff --git a/src/java/org/apache/nutch/indexer/IndexWriters.java b/src/java/org/apache/nutch/indexer/IndexWriters.java
index a8ab0ec9c..f8ae8ee86 100644
--- a/src/java/org/apache/nutch/indexer/IndexWriters.java
+++ b/src/java/org/apache/nutch/indexer/IndexWriters.java
@@ -137,7 +137,7 @@ public class IndexWriters {
return indexWriterConfigs;
} catch (SAXException | IOException | ParserConfigurationException e) {
- LOG.error(e.toString());
+ LOG.error("Failed to read index writers configuration: {}", e.getMessage());
return new IndexWriterConfig[0];
}
}
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
index 302ed75ed..ee6d5d623 100644
--- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
+++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
@@ -34,4 +34,8 @@ public interface SolrConstants {
String PASSWORD = "password";
+ String AUTH_HEADER_NAME = "auth.header.name";
+
+ String AUTH_HEADER_VALUE = "auth.header.value";
+
}
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
index 12d3ff6b7..ec2ab46d2 100644
--- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
+++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
@@ -16,8 +16,8 @@
*/
package org.apache.nutch.indexwriter.solr;
-import java.lang.invoke.MethodHandles;
import java.io.IOException;
+import java.lang.invoke.MethodHandles;
import java.time.format.DateTimeFormatter;
import java.util.AbstractMap;
import java.util.ArrayList;
@@ -72,6 +72,8 @@ public class SolrIndexWriter implements IndexWriter {
private boolean auth;
private String username;
private String password;
+ private String authHeaderName;
+ private String authHeaderValue;
@Override
public void open(Configuration conf, String name) {
@@ -99,20 +101,40 @@ public class SolrIndexWriter implements IndexWriter {
this.auth = parameters.getBoolean(SolrConstants.USE_AUTH, false);
this.username = parameters.get(SolrConstants.USERNAME);
this.password = parameters.get(SolrConstants.PASSWORD);
+ this.authHeaderName = parameters.get(SolrConstants.AUTH_HEADER_NAME, "");
+ this.authHeaderValue = parameters.get(SolrConstants.AUTH_HEADER_VALUE, "");
this.solrClients = new ArrayList<>();
switch (type) {
case "http":
for (String url : urls) {
- solrClients.add(SolrUtils.getHttpSolrClient(url));
+ if (this.auth && !StringUtil.isEmpty(this.authHeaderName)
+ && !StringUtil.isEmpty(this.authHeaderValue)) {
+ solrClients.add(SolrUtils.getHttpSolrClientHeaderAuthorization(url,
+ this.authHeaderName, this.authHeaderValue));
+ } else if (this.auth && !StringUtil.isEmpty(this.username)
+ && !StringUtil.isEmpty(this.password)) {
+ solrClients.add(
+ SolrUtils.getHttpSolrClient(url, this.username, this.password));
+ } else {
+ solrClients.add(SolrUtils.getHttpSolrClient(url));
+ }
}
break;
case "cloud":
- CloudSolrClient sc = this.auth
- ? SolrUtils.getCloudSolrClient(Arrays.asList(urls), this.username,
- this.password)
- : SolrUtils.getCloudSolrClient(Arrays.asList(urls));
+ CloudSolrClient sc;
+ if (this.auth && !StringUtil.isEmpty(this.authHeaderName)
+ && !StringUtil.isEmpty(this.authHeaderValue)) {
+ sc = SolrUtils.getCloudSolrClientHeaderAuthorization(
+ Arrays.asList(urls), this.authHeaderName, this.authHeaderValue);
+ } else if (this.auth && !StringUtil.isEmpty(this.username)
+ && !StringUtil.isEmpty(this.password)) {
+ sc = SolrUtils.getCloudSolrClient(Arrays.asList(urls), this.username,
+ this.password);
+ } else {
+ sc = SolrUtils.getCloudSolrClient(Arrays.asList(urls));
+ }
sc.setDefaultCollection(this.collection);
solrClients.add(sc);
break;
@@ -219,7 +241,8 @@ public class SolrIndexWriter implements IndexWriter {
push();
try {
for (SolrClient solrClient : solrClients) {
- if (this.auth) {
+ if (this.auth && !StringUtil.isEmpty(this.username)
+ && !StringUtil.isEmpty(this.password)) {
UpdateRequest req = new UpdateRequest();
req.setAction(UpdateRequest.ACTION.COMMIT, true, true);
req.setBasicAuthCredentials(this.username, this.password);
@@ -243,7 +266,8 @@ public class SolrIndexWriter implements IndexWriter {
req.add(inputDocs);
req.setAction(UpdateRequest.ACTION.OPTIMIZE, false, false);
req.setParams(params);
- if (this.auth) {
+ if (this.auth && !StringUtil.isEmpty(this.username)
+ && !StringUtil.isEmpty(this.password)) {
req.setBasicAuthCredentials(this.username, this.password);
}
for (SolrClient solrClient : solrClients) {
@@ -264,7 +288,8 @@ public class SolrIndexWriter implements IndexWriter {
req.deleteById(deleteIds);
req.setAction(UpdateRequest.ACTION.OPTIMIZE, false, false);
req.setParams(params);
- if (this.auth) {
+ if (this.auth && !StringUtil.isEmpty(this.username)
+ && !StringUtil.isEmpty(this.password)) {
req.setBasicAuthCredentials(this.username, this.password);
}
@@ -326,6 +351,10 @@ public class SolrIndexWriter implements IndexWriter {
properties.put(SolrConstants.USE_AUTH, new AbstractMap.SimpleEntry<>(
"Whether to enable HTTP basic authentication for communicating with Solr. Use the username and password properties to configure your credentials.",
this.auth));
+ properties.put(SolrConstants.AUTH_HEADER_NAME, new AbstractMap.SimpleEntry<>(
+ "The authentication header content name.", this.authHeaderName));
+ properties.put(SolrConstants.AUTH_HEADER_VALUE, new AbstractMap.SimpleEntry<>(
+ "The authentication header content value.", StringUtil.mask(this.authHeaderValue)));
properties.put(SolrConstants.USERNAME, new AbstractMap.SimpleEntry<>(
"The username of Solr server.", this.username));
properties.put(SolrConstants.PASSWORD, new AbstractMap.SimpleEntry<>(
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
index 8f97b166e..d307edc59 100644
--- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
+++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
@@ -22,10 +22,12 @@ import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.HttpClient;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.message.BasicHeader;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import java.util.Arrays;
import java.util.List;
public class SolrUtils {
@@ -39,7 +41,7 @@ public class SolrUtils {
static CloudSolrClient getCloudSolrClient(List<String> urls, String username,
String password) {
- // Building http client
+ // Building HTTP client
CredentialsProvider provider = new BasicCredentialsProvider();
UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
username, password);
@@ -55,10 +57,100 @@ public class SolrUtils {
return sc;
}
+ /**
+ * Creates a new SolrClient, passing an Authorization header on the requests'
+ * HTTP Header:
+ *
+ * <pre>
+ * Authorization: headerName headerValue
+ * </pre>
+ *
+ * ie.
+ *
+ * <pre>
+ * Authorization: Bearer XXXXXXXXXXX
+ * </pre>
+ *
+ * @param url
+ * Sorl URL
+ * @param headerName
+ * Header name send on the Authorization: Bearer, Token, etc.
+ * @param headerValue
+ * Header value send on the Authorization: JWT_TOKEN
+ * @return CloudSolrClient
+ */
+ static CloudSolrClient getCloudSolrClientHeaderAuthorization(
+ List<String> urls, String headerName, String headerValue) {
+ // Building http client
+ HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
+ httpClientBuilder.setDefaultHeaders(Arrays.asList(
+ new BasicHeader("Authorization", headerName + " " + headerValue)));
+ // Building the client
+ CloudSolrClient sc = new CloudSolrClient.Builder(urls)
+ .withParallelUpdates(true).withHttpClient(httpClientBuilder.build())
+ .build();
+ sc.connect();
+ return sc;
+ }
+
static SolrClient getHttpSolrClient(String url) {
return new HttpSolrClient.Builder(url).build();
}
+ /**
+ * Creates a new SolrClient, passing an Authorization header on the requests'
+ * HTTP Header:
+ *
+ * <pre>
+ * Authorization: headerName headerValue
+ * </pre>
+ *
+ * ie.
+ *
+ * <pre>
+ * Authorization: Bearer XXXXXXXXXXX
+ * </pre>
+ *
+ * @param url
+ * Solr URL
+ * @param headerName
+ * Header name send on the Authorization: Bearer, Token, etc.
+ * @param headerValue
+ * Header value send on the Authorization: JWT_TOKEN
+ * @return SolrClient
+ */
+ static SolrClient getHttpSolrClientHeaderAuthorization(String url,
+ String headerName, String headerValue) {
+ HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
+ httpClientBuilder.setDefaultHeaders(Arrays.asList(
+ new BasicHeader("Authorization", headerName + " " + headerValue)));
+ return new HttpSolrClient.Builder(url)
+ .withHttpClient(httpClientBuilder.build()).build();
+ }
+
+ /**
+ * Creates a new SolrClient, using Basic Authentication.
+ *
+ * @param url
+ * Solr URL
+ * @param username
+ * Username
+ * @param password
+ * Password
+ * @return SolrClient
+ */
+ static SolrClient getHttpSolrClient(String url, String username,
+ String password) {
+ CredentialsProvider provider = new BasicCredentialsProvider();
+ UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
+ username, password);
+ provider.setCredentials(AuthScope.ANY, credentials);
+ HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
+ httpClientBuilder.setDefaultCredentialsProvider(provider);
+ return new HttpSolrClient.Builder(url)
+ .withHttpClient(httpClientBuilder.build()).build();
+ }
+
static String stripNonCharCodepoints(String input) {
StringBuilder retval = new StringBuilder();
char ch;