You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gu...@apache.org on 2019/03/24 23:08:30 UTC

[lucene-solr] branch master updated: SOLR-13324 - Don't swallow/print exception in URLClassifyProcessor anymore

This is an automated email from the ASF dual-hosted git repository.

gus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new c60685f  SOLR-13324 - Don't swallow/print exception in URLClassifyProcessor anymore
c60685f is described below

commit c60685f9e4515ba3d46049300a0d11095a8d877d
Author: Gus Heck <gu...@apache.org>
AuthorDate: Sun Mar 24 19:07:26 2019 -0400

    SOLR-13324 - Don't swallow/print exception in URLClassifyProcessor anymore
---
 solr/CHANGES.txt                                   |  4 +++
 .../update/processor/URLClassifyProcessor.java     | 41 ++++++++++------------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bbf8333..834db2f 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -41,6 +41,10 @@ Upgrade Notes
   expanding the 'expr' parameter can be reinstated with -DStreamingExpressionMacros=true passed to the JVM at startup
   (Gus Heck).
 
+* SOLR-13324: URLClassifyProcessor#getCanonicalUrl now throws MalformedURLException rather than hiding it. Although the
+  present code is unlikely to produce such an exception it may be possible in future changes or in subclasses.
+  Currently this change should only effect compatibility of custom code overriding this method (Gus Heck).
+
 New Features
 ----------------------
 * SOLR-13131: Category Routed Aliases are now available for data driven assignment of documents to collections based on
diff --git a/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java
index 0844b60..a99b7cb 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java
@@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory;
  * and helping to produce values which may be used for boosting or filtering later.
  */
 public class URLClassifyProcessor extends UpdateRequestProcessor {
-  
+
   private static final String INPUT_FIELD_PARAM = "inputField";
   private static final String OUTPUT_LENGTH_FIELD_PARAM = "lengthOutputField";
   private static final String OUTPUT_LEVELS_FIELD_PARAM = "levelsOutputField";
@@ -84,16 +84,16 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
       "welcome.asp",
       "welcome.aspx"
   };
-  
+
   public URLClassifyProcessor(SolrParams parameters,
       SolrQueryRequest request,
       SolrQueryResponse response,
       UpdateRequestProcessor nextProcessor) {
     super(nextProcessor);
-    
+
     this.initParameters(parameters);
   }
-  
+
   private void initParameters(SolrParams parameters) {
     if (parameters != null) {
       this.setEnabled(parameters.getBool("enabled", true));
@@ -106,7 +106,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
       this.canonicalUrlFieldname = parameters.get(OUTPUT_CANONICALURL_FIELD_PARAM);
     }
   }
-  
+
   @Override
   public void processAdd(AddUpdateCommand command) throws IOException {
     if (isEnabled()) {
@@ -133,24 +133,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
     }
     super.processAdd(command);
   }
-  
+
   /**
    * Gets a canonical form of the URL for use as main URL
    * @param url The input url
    * @return The URL object representing the canonical URL
    */
-  public URL getCanonicalUrl(URL url) {
+  public URL getCanonicalUrl(URL url) throws MalformedURLException {
     // NOTE: Do we want to make sure this URL is normalized? (Christian thinks we should)
     String urlString = url.toString();
-    try {
-      String lps = landingPageSuffix(url);
-      return new URL(urlString.replaceFirst("/"+lps+"$", "/"));
-    } catch (MalformedURLException e) {
-      e.printStackTrace();
-    }
-    return url;
+    String lps = landingPageSuffix(url);
+    return new URL(urlString.replaceFirst("/" + lps + "$", "/"));
   }
-  
+
   /**
    * Calculates the length of the URL in characters
    * @param url The input URL
@@ -159,7 +154,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
   public int length(URL url) {
     return url.toString().length();
   }
-  
+
   /**
    * Calculates the number of path levels in the given URL
    * @param url The input URL
@@ -176,7 +171,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
     }
     return levels;
   }
-  
+
   /**
    * Calculates whether a URL is a top level page
    * @param url The input URL
@@ -187,7 +182,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
     String path = getPathWithoutSuffix(url).replaceAll("/+$", "");
     return path.length() == 0 && url.getQuery() == null;
   }
-  
+
   /**
    * Calculates whether the URL is a landing page or not
    * @param url The input URL
@@ -200,19 +195,19 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
       return landingPageSuffix(url) != "";
     }
   }
-  
+
   public URL getNormalizedURL(String url) throws MalformedURLException, URISyntaxException {
     return new URI(url).normalize().toURL();
   }
-  
+
   public boolean isEnabled() {
     return enabled;
   }
-  
+
   public void setEnabled(boolean enabled) {
     this.enabled = enabled;
   }
-  
+
   private String landingPageSuffix(URL url) {
     String path = url.getPath().toLowerCase(Locale.ROOT);
     for(String suffix : landingPageSuffixes) {
@@ -222,7 +217,7 @@ public class URLClassifyProcessor extends UpdateRequestProcessor {
     }
     return "";
   }
-  
+
   private String getPathWithoutSuffix(URL url) {
     return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", "");
   }