You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2020/12/17 16:56:15 UTC

[nutch] branch master updated: NUTCH-2835 Upgrade commons-jexl from 2 --> 3 (#558)

This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d8e08b  NUTCH-2835 Upgrade commons-jexl from 2 --> 3 (#558)
8d8e08b is described below

commit 8d8e08b354fd94fced548c0b73623a375bcc8b2b
Author: Lewis John McGibbney <le...@gmail.com>
AuthorDate: Thu Dec 17 08:56:04 2020 -0800

    NUTCH-2835 Upgrade commons-jexl from 2 --> 3 (#558)
---
 ivy/ivy.xml                                             |  2 +-
 src/java/org/apache/nutch/crawl/CrawlDatum.java         |  8 ++++----
 src/java/org/apache/nutch/crawl/CrawlDbReader.java      |  4 ++--
 src/java/org/apache/nutch/crawl/Generator.java          | 12 ++++++------
 src/java/org/apache/nutch/hostdb/ReadHostDb.java        | 17 +++++++----------
 src/java/org/apache/nutch/util/JexlUtil.java            | 12 +++++-------
 .../org/apache/nutch/exchange/jexl/JexlExchange.java    |  8 ++++----
 .../apache/nutch/indexer/jexl/JexlIndexingFilter.java   | 10 +++++-----
 8 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index 16ed8a6..a20d8a6 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -46,7 +46,7 @@
 		<dependency org="org.apache.httpcomponents" name="httpclient" rev="4.5.6" conf="*->master" />
 		<dependency org="commons-codec" name="commons-codec" rev="1.11" conf="*->default" />
 		<dependency org="org.apache.commons" name="commons-compress" rev="1.18" conf="*->default" />
-		<dependency org="org.apache.commons" name="commons-jexl" rev="2.1.1" />
+		<dependency org="org.apache.commons" name="commons-jexl3" rev="3.1" conf="*->default"/>
 		<dependency org="com.tdunning" name="t-digest" rev="3.2" />
 
 		<!-- Hadoop Dependencies -->
diff --git a/src/java/org/apache/nutch/crawl/CrawlDatum.java b/src/java/org/apache/nutch/crawl/CrawlDatum.java
index e05d7fd..5159bdb 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDatum.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDatum.java
@@ -25,9 +25,9 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Map.Entry;
 
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.MapContext;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
@@ -542,7 +542,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable {
     }
   }
   
-  public boolean evaluate(Expression expr, String url) {
+  public boolean evaluate(JexlExpression expr, String url) {
     if (expr != null && url != null) {
       // Create a context and add data
       JexlContext jcontext = new MapContext();
diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReader.java b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
index 1bb8160..3af63d3 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDbReader.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
@@ -74,7 +74,7 @@ import org.apache.nutch.util.NutchJob;
 import org.apache.nutch.util.SegmentReaderUtil;
 import org.apache.nutch.util.StringUtil;
 import org.apache.nutch.util.TimingUtil;
-import org.apache.commons.jexl2.Expression;
+import org.apache.commons.jexl3.JexlExpression;
 
 import com.fasterxml.jackson.core.JsonGenerationException;
 import com.fasterxml.jackson.core.JsonGenerator;
@@ -864,7 +864,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
     Matcher matcher = null;
     String status = null;
     Integer retry = null;
-    Expression expr = null;
+    JexlExpression expr = null;
     float sample;
 
     @Override
diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java
index 04c2ae8..c3f4469 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -34,9 +34,9 @@ import java.util.Random;
 import org.apache.hadoop.conf.Configurable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.MapContext;
 import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -182,7 +182,7 @@ public class Generator extends NutchTool implements Tool {
     private float scoreThreshold = 0f;
     private int intervalThreshold = -1;
     private byte restrictStatus = -1;
-    private Expression expr = null;
+    private JexlExpression expr = null;
 
     @Override
     public void setup(
@@ -306,8 +306,8 @@ public class Generator extends NutchTool implements Tool {
     private URLNormalizers normalizers;
     private static boolean normalise;
     private SequenceFile.Reader[] hostdbReaders = null;
-    private Expression maxCountExpr = null;
-    private Expression fetchDelayExpr = null;
+    private JexlExpression maxCountExpr = null;
+    private JexlExpression fetchDelayExpr = null;
 
     public void open() {
       if (conf.get(GENERATOR_HOSTDB) != null) {
diff --git a/src/java/org/apache/nutch/hostdb/ReadHostDb.java b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
index 62bf3a7..be9f459 100644
--- a/src/java/org/apache/nutch/hostdb/ReadHostDb.java
+++ b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
@@ -44,10 +44,11 @@ import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.SegmentReaderUtil;
 
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlEngine;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlBuilder;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlEngine;
+import org.apache.commons.jexl3.MapContext;
 
 /**
  * @see <a href='https://commons.apache.org/proper/commons-jexl/reference/syntax.html'>Commons</a>
@@ -67,7 +68,7 @@ public class ReadHostDb extends Configured implements Tool {
     protected boolean dumpHomepages = false;
     protected boolean fieldHeader = true;
     protected Text emptyText = new Text();
-    protected Expression expr = null;
+    protected JexlExpression expr = null;
 
     @Override
     public void setup(Context context) {
@@ -77,11 +78,7 @@ public class ReadHostDb extends Configured implements Tool {
       String expr = context.getConfiguration().get(HOSTDB_FILTER_EXPRESSION);
       if (expr != null) {
         // Create or retrieve a JexlEngine
-        JexlEngine jexl = new JexlEngine();
-        
-        // Dont't be silent and be strict
-        jexl.setSilent(true);
-        jexl.setStrict(true);
+        JexlEngine jexl = new JexlBuilder().silent(true).strict(true).create();
         
         // Create an expression object
         this.expr = jexl.createExpression(expr);
diff --git a/src/java/org/apache/nutch/util/JexlUtil.java b/src/java/org/apache/nutch/util/JexlUtil.java
index 42c8728..24f9fe6 100644
--- a/src/java/org/apache/nutch/util/JexlUtil.java
+++ b/src/java/org/apache/nutch/util/JexlUtil.java
@@ -21,8 +21,9 @@ import java.util.Date;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlEngine;
+import org.apache.commons.jexl3.JexlBuilder;
+import org.apache.commons.jexl3.JexlEngine;
+import org.apache.commons.jexl3.JexlExpression;
 import org.apache.commons.lang.time.DateUtils;
 
 import org.slf4j.Logger;
@@ -46,7 +47,7 @@ public class JexlUtil {
    * @param expr string JEXL expression
    * @return parsed JEXL expression or null in case of parse error
    */
-  public static Expression parseExpression(String expr) {
+  public static JexlExpression parseExpression(String expr) {
     if (expr == null) return null;
     
     try {
@@ -65,10 +66,7 @@ public class JexlUtil {
         expr = expr.replace(date, Long.toString(time));
       }
 
-      JexlEngine jexl = new JexlEngine();
-
-      jexl.setSilent(true);
-      jexl.setStrict(true);
+      JexlEngine jexl = new JexlBuilder().silent(true).strict(true).create();
 
       return jexl.createExpression(expr);
     } catch (Exception e) {
diff --git a/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java b/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
index 5273a572..e889040 100644
--- a/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
+++ b/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
@@ -16,9 +16,9 @@
  */
 package org.apache.nutch.exchange.jexl;
 
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.MapContext;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.exchange.Exchange;
 import org.apache.nutch.indexer.NutchDocument;
@@ -32,7 +32,7 @@ public class JexlExchange implements Exchange {
 
   private Configuration conf;
 
-  private Expression expression;
+  private JexlExpression expression;
 
   /**
    * Initializes the internal variables.
diff --git a/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java b/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
index 633e0d4..ac387c0 100644
--- a/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
+++ b/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
@@ -20,9 +20,9 @@ import java.lang.invoke.MethodHandles;
 import java.util.List;
 import java.util.Map.Entry;
 
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.MapContext;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.nutch.crawl.CrawlDatum;
@@ -48,7 +48,7 @@ public class JexlIndexingFilter implements IndexingFilter {
       .getLogger(MethodHandles.lookup().lookupClass());
 
   private Configuration conf;
-  private Expression expr;
+  private JexlExpression expr;
 
   @Override
   public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
@@ -96,7 +96,7 @@ public class JexlIndexingFilter implements IndexingFilter {
         return doc;
       }
     } catch (Exception e) {
-      LOG.warn("Failed evaluating JEXL {}", expr.getExpression(), e);
+      LOG.warn("Failed evaluating JEXL {}", expr.getSourceText(), e);
     }
     return null;
   }