You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2020/12/17 16:56:15 UTC
[nutch] branch master updated: NUTCH-2835 Upgrade commons-jexl from
2 --> 3 (#558)
This is an automated email from the ASF dual-hosted git repository.
lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 8d8e08b NUTCH-2835 Upgrade commons-jexl from 2 --> 3 (#558)
8d8e08b is described below
commit 8d8e08b354fd94fced548c0b73623a375bcc8b2b
Author: Lewis John McGibbney <le...@gmail.com>
AuthorDate: Thu Dec 17 08:56:04 2020 -0800
NUTCH-2835 Upgrade commons-jexl from 2 --> 3 (#558)
---
ivy/ivy.xml | 2 +-
src/java/org/apache/nutch/crawl/CrawlDatum.java | 8 ++++----
src/java/org/apache/nutch/crawl/CrawlDbReader.java | 4 ++--
src/java/org/apache/nutch/crawl/Generator.java | 12 ++++++------
src/java/org/apache/nutch/hostdb/ReadHostDb.java | 17 +++++++----------
src/java/org/apache/nutch/util/JexlUtil.java | 12 +++++-------
.../org/apache/nutch/exchange/jexl/JexlExchange.java | 8 ++++----
.../apache/nutch/indexer/jexl/JexlIndexingFilter.java | 10 +++++-----
8 files changed, 34 insertions(+), 39 deletions(-)
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index 16ed8a6..a20d8a6 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -46,7 +46,7 @@
<dependency org="org.apache.httpcomponents" name="httpclient" rev="4.5.6" conf="*->master" />
<dependency org="commons-codec" name="commons-codec" rev="1.11" conf="*->default" />
<dependency org="org.apache.commons" name="commons-compress" rev="1.18" conf="*->default" />
- <dependency org="org.apache.commons" name="commons-jexl" rev="2.1.1" />
+ <dependency org="org.apache.commons" name="commons-jexl3" rev="3.1" conf="*->default"/>
<dependency org="com.tdunning" name="t-digest" rev="3.2" />
<!-- Hadoop Dependencies -->
diff --git a/src/java/org/apache/nutch/crawl/CrawlDatum.java b/src/java/org/apache/nutch/crawl/CrawlDatum.java
index e05d7fd..5159bdb 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDatum.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDatum.java
@@ -25,9 +25,9 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.MapContext;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
@@ -542,7 +542,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable {
}
}
- public boolean evaluate(Expression expr, String url) {
+ public boolean evaluate(JexlExpression expr, String url) {
if (expr != null && url != null) {
// Create a context and add data
JexlContext jcontext = new MapContext();
diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReader.java b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
index 1bb8160..3af63d3 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDbReader.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
@@ -74,7 +74,7 @@ import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.SegmentReaderUtil;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.TimingUtil;
-import org.apache.commons.jexl2.Expression;
+import org.apache.commons.jexl3.JexlExpression;
import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.JsonGenerator;
@@ -864,7 +864,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable {
Matcher matcher = null;
String status = null;
Integer retry = null;
- Expression expr = null;
+ JexlExpression expr = null;
float sample;
@Override
diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java
index 04c2ae8..c3f4469 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -34,9 +34,9 @@ import java.util.Random;
import org.apache.hadoop.conf.Configurable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.MapContext;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
@@ -182,7 +182,7 @@ public class Generator extends NutchTool implements Tool {
private float scoreThreshold = 0f;
private int intervalThreshold = -1;
private byte restrictStatus = -1;
- private Expression expr = null;
+ private JexlExpression expr = null;
@Override
public void setup(
@@ -306,8 +306,8 @@ public class Generator extends NutchTool implements Tool {
private URLNormalizers normalizers;
private static boolean normalise;
private SequenceFile.Reader[] hostdbReaders = null;
- private Expression maxCountExpr = null;
- private Expression fetchDelayExpr = null;
+ private JexlExpression maxCountExpr = null;
+ private JexlExpression fetchDelayExpr = null;
public void open() {
if (conf.get(GENERATOR_HOSTDB) != null) {
diff --git a/src/java/org/apache/nutch/hostdb/ReadHostDb.java b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
index 62bf3a7..be9f459 100644
--- a/src/java/org/apache/nutch/hostdb/ReadHostDb.java
+++ b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
@@ -44,10 +44,11 @@ import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.SegmentReaderUtil;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlEngine;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlBuilder;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlEngine;
+import org.apache.commons.jexl3.MapContext;
/**
* @see <a href='https://commons.apache.org/proper/commons-jexl/reference/syntax.html'>Commons</a>
@@ -67,7 +68,7 @@ public class ReadHostDb extends Configured implements Tool {
protected boolean dumpHomepages = false;
protected boolean fieldHeader = true;
protected Text emptyText = new Text();
- protected Expression expr = null;
+ protected JexlExpression expr = null;
@Override
public void setup(Context context) {
@@ -77,11 +78,7 @@ public class ReadHostDb extends Configured implements Tool {
String expr = context.getConfiguration().get(HOSTDB_FILTER_EXPRESSION);
if (expr != null) {
// Create or retrieve a JexlEngine
- JexlEngine jexl = new JexlEngine();
-
- // Dont't be silent and be strict
- jexl.setSilent(true);
- jexl.setStrict(true);
+ JexlEngine jexl = new JexlBuilder().silent(true).strict(true).create();
// Create an expression object
this.expr = jexl.createExpression(expr);
diff --git a/src/java/org/apache/nutch/util/JexlUtil.java b/src/java/org/apache/nutch/util/JexlUtil.java
index 42c8728..24f9fe6 100644
--- a/src/java/org/apache/nutch/util/JexlUtil.java
+++ b/src/java/org/apache/nutch/util/JexlUtil.java
@@ -21,8 +21,9 @@ import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlEngine;
+import org.apache.commons.jexl3.JexlBuilder;
+import org.apache.commons.jexl3.JexlEngine;
+import org.apache.commons.jexl3.JexlExpression;
import org.apache.commons.lang.time.DateUtils;
import org.slf4j.Logger;
@@ -46,7 +47,7 @@ public class JexlUtil {
* @param expr string JEXL expression
* @return parsed JEXL expression or null in case of parse error
*/
- public static Expression parseExpression(String expr) {
+ public static JexlExpression parseExpression(String expr) {
if (expr == null) return null;
try {
@@ -65,10 +66,7 @@ public class JexlUtil {
expr = expr.replace(date, Long.toString(time));
}
- JexlEngine jexl = new JexlEngine();
-
- jexl.setSilent(true);
- jexl.setStrict(true);
+ JexlEngine jexl = new JexlBuilder().silent(true).strict(true).create();
return jexl.createExpression(expr);
} catch (Exception e) {
diff --git a/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java b/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
index 5273a572..e889040 100644
--- a/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
+++ b/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
@@ -16,9 +16,9 @@
*/
package org.apache.nutch.exchange.jexl;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.MapContext;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.exchange.Exchange;
import org.apache.nutch.indexer.NutchDocument;
@@ -32,7 +32,7 @@ public class JexlExchange implements Exchange {
private Configuration conf;
- private Expression expression;
+ private JexlExpression expression;
/**
* Initializes the internal variables.
diff --git a/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java b/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
index 633e0d4..ac387c0 100644
--- a/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
+++ b/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
@@ -20,9 +20,9 @@ import java.lang.invoke.MethodHandles;
import java.util.List;
import java.util.Map.Entry;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.MapContext;
+import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlContext;
+import org.apache.commons.jexl3.MapContext;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
@@ -48,7 +48,7 @@ public class JexlIndexingFilter implements IndexingFilter {
.getLogger(MethodHandles.lookup().lookupClass());
private Configuration conf;
- private Expression expr;
+ private JexlExpression expr;
@Override
public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
@@ -96,7 +96,7 @@ public class JexlIndexingFilter implements IndexingFilter {
return doc;
}
} catch (Exception e) {
- LOG.warn("Failed evaluating JEXL {}", expr.getExpression(), e);
+ LOG.warn("Failed evaluating JEXL {}", expr.getSourceText(), e);
}
return null;
}