You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gu...@apache.org on 2019/03/24 23:08:30 UTC
[lucene-solr] branch master updated: SOLR-13324 - Don't
swallow/print exception in URLClassifyProcessor anymore
This is an automated email from the ASF dual-hosted git repository.
gus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new c60685f SOLR-13324 - Don't swallow/print exception in URLClassifyProcessor anymore
c60685f is described below
commit c60685f9e4515ba3d46049300a0d11095a8d877d
Author: Gus Heck <gu...@apache.org>
AuthorDate: Sun Mar 24 19:07:26 2019 -0400
SOLR-13324 - Don't swallow/print exception in URLClassifyProcessor anymore
---
solr/CHANGES.txt | 4 +++
.../update/processor/URLClassifyProcessor.java | 41 ++++++++++------------
2 files changed, 22 insertions(+), 23 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bbf8333..834db2f 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -41,6 +41,10 @@ Upgrade Notes
expanding the 'expr' parameter can be reinstated with -DStreamingExpressionMacros=true passed to the JVM at startup
(Gus Heck).
+* SOLR-13324: URLClassifyProcessor#getCanonicalUrl now throws MalformedURLException rather than hiding it. Although the
+ present code is unlikely to produce such an exception it may be possible in future changes or in subclasses.
+ Currently this change should only effect compatibility of custom code overriding this method (Gus Heck).
+
New Features
----------------------
* SOLR-13131: Category Routed Aliases are now available for data driven assignment of documents to collections based on
diff --git a/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java
index 0844b60..a99b7cb 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java
@@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory;
* and helping to produce values which may be used for boosting or filtering later.
*/
public class URLClassifyProcessor extends UpdateRequestProcessor {
-
+
private static final String INPUT_FIELD_PARAM = "inputField";
private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField";
private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField";
@@ -84,16 +84,16 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
"welcome.asp",
"welcome.aspx"
};
-
+
public URLClassifyProcessor(SolrParams parameters,
SolrQueryRequest request,
SolrQueryResponse response,
UpdateRequestProcessor nextProcessor) {
super(nextProcessor);
-
+
this.initParameters(parameters);
}
-
+
private void initParameters(SolrParams parameters) {
if (parameters != null) {
this.setEnabled(parameters.getBool("enabled", true));
@@ -106,7 +106,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM);
}
}
-
+
@Override
public void processAdd(AddUpdateCommand command) throws IOException {
if (isEnabled()) {
@@ -133,24 +133,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
}
super.processAdd(command);
}
-
+
/**
* Gets a canonical form of the URL for use as main URL
* @param url The input url
* @return The URL object representing the canonical URL
*/
- public URL getCanonicalUrl(URL url) {
+ public URL getCanonicalUrl(URL url) throws MalformedURLException {
// NOTE: Do we want to make sure this URL is normalized? (Christian thinks we should)
String urlString = url.toString();
- try {
- String lps = landingPageSuffix(url);
- return new URL(urlString.replaceFirst("/"+lps+"$", "/"));
- } catch (MalformedURLException e) {
- e.printStackTrace();
- }
- return url;
+ String lps = landingPageSuffix(url);
+ return new URL(urlString.replaceFirst("/" + lps + "$", "/"));
}
-
+
/**
* Calculates the length of the URL in characters
* @param url The input URL
@@ -159,7 +154,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
public int length(URL url) {
return url.toString().length();
}
-
+
/**
* Calculates the number of path levels in the given URL
* @param url The input URL
@@ -176,7 +171,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
}
return levels;
}
-
+
/**
* Calculates whether a URL is a top level page
* @param url The input URL
@@ -187,7 +182,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
String path = getPathWithoutSuffix(url).replaceAll("/+$", "");
return path.length() == 0 && url.getQuery() == null;
}
-
+
/**
* Calculates whether the URL is a landing page or not
* @param url The input URL
@@ -200,19 +195,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
return landingPageSuffix(url) != "";
}
}
-
+
public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException {
return new URI(url).normalize().toURL();
}
-
+
public boolean isEnabled() {
return enabled;
}
-
+
public void setEnabled(boolean enabled) {
this.enabled = enabled;
}
-
+
private String landingPageSuffix(URL url) {
String path = url.getPath().toLowerCase(Locale.ROOT);
for(String suffix : landingPageSuffixes) {
@@ -222,7 +217,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
}
return "";
}
-
+
private String getPathWithoutSuffix(URL url) {
return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", "");
}