You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ge...@apache.org on 2019/08/08 12:38:30 UTC
[lucene-solr] branch master updated: SOLR-13622: Rename FilesStream
-> CatStream
This is an automated email from the ASF dual-hosted git repository.
gerlowskija pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 2eb493d SOLR-13622: Rename FilesStream -> CatStream
2eb493d is described below
commit 2eb493d1700d59845ac120dcc485556b7e7fb422
Author: Jason Gerlowski <ge...@apache.org>
AuthorDate: Wed Aug 7 21:17:48 2019 -0400
SOLR-13622: Rename FilesStream -> CatStream
Also fixes an 'cat' OS-dependent bug in StreamExpressionTest.
---
solr/CHANGES.txt | 2 +-
.../handler/{FilesStream.java => CatStream.java} | 6 ++--
.../solr/handler/SolrDefaultStreamFactory.java | 2 +-
.../src/stream-source-reference.adoc | 14 ++++-----
.../solrj/io/stream/StreamExpressionTest.java | 33 ++++++++++++----------
5 files changed, 30 insertions(+), 27 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f0018a0..f05d6f7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -102,7 +102,7 @@ New Features
* SOLR-13553: Node level custom RequestHandlers (noble)
-* SOLR-13622: Add files() stream source to create tuples from lines in local files (Jason Gerlowski and Joel Bernstein)
+* SOLR-13622: Add cat() stream source to create tuples from lines in local files (Jason Gerlowski and Joel Bernstein)
* SOLR-11866: QueryElevationComponent can have query rules configured with match="subset" wherein the words need only
match a subset of the query's words and in any order. (Bruno Roustant via David Smiley)
diff --git a/solr/core/src/java/org/apache/solr/handler/FilesStream.java b/solr/core/src/java/org/apache/solr/handler/CatStream.java
similarity index 97%
rename from solr/core/src/java/org/apache/solr/handler/FilesStream.java
rename to solr/core/src/java/org/apache/solr/handler/CatStream.java
index c47812a..17747550 100644
--- a/solr/core/src/java/org/apache/solr/handler/FilesStream.java
+++ b/solr/core/src/java/org/apache/solr/handler/CatStream.java
@@ -46,7 +46,7 @@ import org.apache.solr.core.SolrResourceLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class FilesStream extends TupleStream implements Expressible {
+public class CatStream extends TupleStream implements Expressible {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final String commaDelimitedFilepaths;
@@ -60,11 +60,11 @@ public class FilesStream extends TupleStream implements Expressible {
private CrawlFile currentFilePath;
private LineIterator currentFileLines;
- public FilesStream(StreamExpression expression, StreamFactory factory) throws IOException {
+ public CatStream(StreamExpression expression, StreamFactory factory) throws IOException {
this(factory.getValueOperand(expression, 0), factory.getIntOperand(expression, "maxLines", -1));
}
- public FilesStream(String commaDelimitedFilepaths, int maxLines) {
+ public CatStream(String commaDelimitedFilepaths, int maxLines) {
if (commaDelimitedFilepaths == null) {
throw new IllegalArgumentException("No filepaths provided to stream");
}
diff --git a/solr/core/src/java/org/apache/solr/handler/SolrDefaultStreamFactory.java b/solr/core/src/java/org/apache/solr/handler/SolrDefaultStreamFactory.java
index 0ee16b1..ca75927 100644
--- a/solr/core/src/java/org/apache/solr/handler/SolrDefaultStreamFactory.java
+++ b/solr/core/src/java/org/apache/solr/handler/SolrDefaultStreamFactory.java
@@ -34,7 +34,7 @@ public class SolrDefaultStreamFactory extends DefaultStreamFactory {
public SolrDefaultStreamFactory() {
super();
this.withFunctionName("analyze", AnalyzeEvaluator.class);
- this.withFunctionName("files", FilesStream.class);
+ this.withFunctionName("cat", CatStream.class);
this.withFunctionName("classify", ClassifyStream.class);
this.withFunctionName("haversineMeters", HaversineMetersEvaluator.class);
}
diff --git a/solr/solr-ref-guide/src/stream-source-reference.adoc b/solr/solr-ref-guide/src/stream-source-reference.adoc
index e168e7e..d3040fe 100644
--- a/solr/solr-ref-guide/src/stream-source-reference.adoc
+++ b/solr/solr-ref-guide/src/stream-source-reference.adoc
@@ -216,31 +216,31 @@ features(collection1,
numTerms=250)
----
-== files
+== cat
-The `files` function reads the specified files or directories and emits each line in the file(s) as a tuple.
+The `cat` function reads the specified files or directories and emits each line in the file(s) as a tuple.
Each emitted tuple contains two fields: `file` and `line`. `file` contains the path to the file being read from relative to the `userfiles` chroot (directly under `$SOLR_HOME`), and `line` contains a line in that file.
-`files` is ideally used with the `update` stream to index data from the specified documents, or with the `analyze` stream to further split the lines into individual tokens for statistical processing or visualization.
+`cat` is ideally used with the `update` stream to index data from the specified documents, or with the `analyze` stream to further split the lines into individual tokens for statistical processing or visualization.
-=== files Parameters
+=== cat Parameters
* `filePaths`: (Mandatory) a comma separated list of filepaths to read lines from. If the specified path is a directory, it will be crawled recursively and all contained files will be read. To prevent malicious users from reading arbitrary files from Solr nodes, `filePaths` must be a relative path measured from a chroot of `$SOLR_HOME/userfiles` on the node running the streaming expression.
* `maxLines`: (defaults to -1) The maximum number of lines to read (and tuples to emit). If a negative value is specified, all lines in the specified files will be emitted as tuples. Files are read in the order that they appear in the comma-separated `filePaths` argument. If the line-limit is hit, it will be these later files that are partially emitted or not read at all.
-=== files Examples
+=== cat Examples
The following example emits all lines from a single text file located at `$SOLR_HOME/userfiles/authors.txt`:
[source,text]
----
-files("authors.txt")
+cat("authors.txt")
----
This example will read lines from `$SOLR_HOME/userfiles/authors.txt`, as well as all files (recursively) found under `$SOLR_HOME/userfiles/fiction/scifi`. Only 500 lines will be emitted, meaning that some files may be partially emitted or not read at all:
[source,text]
----
-files("authors.txt,fiction/scifi/", maxLines=500)
+cat("authors.txt,fiction/scifi/", maxLines=500)
----
== nodes
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
index 146912f..ee63a2c 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
@@ -3064,10 +3064,10 @@ public class StreamExpressionTest extends SolrCloudTestCase {
}
@Test
- public void testFileStreamSingleFile() throws Exception {
- final String fileStream = "files(\"topLevel1.txt\")";
+ public void testCatStreamSingleFile() throws Exception {
+ final String catStream = "cat(\"topLevel1.txt\")";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
- paramsLoc.set("expr", fileStream);
+ paramsLoc.set("expr", catStream);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+FILESTREAM_COLLECTION;
@@ -3086,10 +3086,10 @@ public class StreamExpressionTest extends SolrCloudTestCase {
}
@Test
- public void testFileStreamMaxLines() throws Exception {
- final String fileStream = "files(\"topLevel1.txt\", maxLines=2)";
+ public void testCatStreamMaxLines() throws Exception {
+ final String catStream = "cat(\"topLevel1.txt\", maxLines=2)";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
- paramsLoc.set("expr", fileStream);
+ paramsLoc.set("expr", catStream);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+FILESTREAM_COLLECTION;
@@ -3108,10 +3108,10 @@ public class StreamExpressionTest extends SolrCloudTestCase {
}
@Test
- public void testFileStreamDirectoryCrawl() throws Exception {
- final String fileStream = "files(\"directory1\")";
+ public void testCatStreamDirectoryCrawl() throws Exception {
+ final String catStream = "cat(\"directory1\")";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
- paramsLoc.set("expr", fileStream);
+ paramsLoc.set("expr", catStream);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+FILESTREAM_COLLECTION;
@@ -3122,24 +3122,26 @@ public class StreamExpressionTest extends SolrCloudTestCase {
List<Tuple> tuples = getTuples(solrStream);
assertEquals(8, tuples.size());
+ final String expectedSecondLevel1Path = "directory1" + File.separator + "secondLevel1.txt";
for (int i = 0; i < 4; i++) {
Tuple t = tuples.get(i);
assertEquals("secondLevel1.txt line " + String.valueOf(i+1), t.get("line"));
- assertEquals("directory1/secondLevel1.txt", t.get("file"));
+ assertEquals(expectedSecondLevel1Path, t.get("file"));
}
+ final String expectedSecondLevel2Path = "directory1" + File.separator + "secondLevel2.txt";
for (int i = 4; i < 8; i++) {
Tuple t = tuples.get(i);
assertEquals("secondLevel2.txt line " + String.valueOf(i - 3), t.get("line"));
- assertEquals("directory1/secondLevel2.txt", t.get("file"));
+ assertEquals(expectedSecondLevel2Path, t.get("file"));
}
}
@Test
- public void testFileStreamMultipleExplicitFiles() throws Exception {
- final String fileStream = "files(\"topLevel1.txt,directory1/secondLevel2.txt\")";
+ public void testCatStreamMultipleExplicitFiles() throws Exception {
+ final String catStream = "cat(\"topLevel1.txt,directory1" + File.separator + "secondLevel2.txt\")";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
- paramsLoc.set("expr", fileStream);
+ paramsLoc.set("expr", catStream);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+FILESTREAM_COLLECTION;
@@ -3156,10 +3158,11 @@ public class StreamExpressionTest extends SolrCloudTestCase {
assertEquals("topLevel1.txt", t.get("file"));
}
+ final String expectedSecondLevel2Path = "directory1" + File.separator + "secondLevel2.txt";
for (int i = 4; i < 8; i++) {
Tuple t = tuples.get(i);
assertEquals("secondLevel2.txt line " + String.valueOf(i - 3), t.get("line"));
- assertEquals("directory1/secondLevel2.txt", t.get("file"));
+ assertEquals(expectedSecondLevel2Path, t.get("file"));
}
}