You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ge...@apache.org on 2019/08/08 12:39:22 UTC

[lucene-solr] branch branch_8x updated: SOLR-13622: Rename FilesStream -> CatStream

This is an automated email from the ASF dual-hosted git repository.

gerlowskija pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 299d92d  SOLR-13622: Rename FilesStream -> CatStream
299d92d is described below

commit 299d92da5cc6315a98ef656a66ab7b285ecb4e3d
Author: Jason Gerlowski <ge...@apache.org>
AuthorDate: Wed Aug 7 21:17:48 2019 -0400

    SOLR-13622: Rename FilesStream -> CatStream
    
    Also fixes an 'cat' OS-dependent bug in StreamExpressionTest.
---
 solr/CHANGES.txt                                   |  2 +-
 .../handler/{FilesStream.java => CatStream.java}   |  6 ++--
 .../solr/handler/SolrDefaultStreamFactory.java     |  2 +-
 .../src/stream-source-reference.adoc               | 14 ++++-----
 .../solrj/io/stream/StreamExpressionTest.java      | 33 ++++++++++++----------
 5 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 2658db0..b3de7fa 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -52,7 +52,7 @@ New Features
 
 * SOLR-13553: Node level custom RequestHandlers (noble)
 
-* SOLR-13622: Add files() stream source to create tuples from lines in local files (Jason Gerlowski and Joel Bernstein)
+* SOLR-13622: Add cat() stream source to create tuples from lines in local files (Jason Gerlowski and Joel Bernstein)
 
 * SOLR-11866: QueryElevationComponent can have query rules configured with match="subset" wherein the words need only
   match a subset of the query's words and in any order.  (Bruno Roustant via David Smiley)
diff --git a/solr/core/src/java/org/apache/solr/handler/FilesStream.java b/solr/core/src/java/org/apache/solr/handler/CatStream.java
similarity index 97%
rename from solr/core/src/java/org/apache/solr/handler/FilesStream.java
rename to solr/core/src/java/org/apache/solr/handler/CatStream.java
index c47812a..17747550 100644
--- a/solr/core/src/java/org/apache/solr/handler/FilesStream.java
+++ b/solr/core/src/java/org/apache/solr/handler/CatStream.java
@@ -46,7 +46,7 @@ import org.apache.solr.core.SolrResourceLoader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class FilesStream extends TupleStream implements Expressible {
+public class CatStream extends TupleStream implements Expressible {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   private final String commaDelimitedFilepaths;
@@ -60,11 +60,11 @@ public class FilesStream extends TupleStream implements Expressible {
   private CrawlFile currentFilePath;
   private LineIterator currentFileLines;
 
-  public FilesStream(StreamExpression expression, StreamFactory factory) throws IOException {
+  public CatStream(StreamExpression expression, StreamFactory factory) throws IOException {
     this(factory.getValueOperand(expression, 0), factory.getIntOperand(expression, "maxLines", -1));
   }
 
-  public FilesStream(String commaDelimitedFilepaths, int maxLines) {
+  public CatStream(String commaDelimitedFilepaths, int maxLines) {
     if (commaDelimitedFilepaths == null) {
       throw new IllegalArgumentException("No filepaths provided to stream");
     }
diff --git a/solr/core/src/java/org/apache/solr/handler/SolrDefaultStreamFactory.java b/solr/core/src/java/org/apache/solr/handler/SolrDefaultStreamFactory.java
index 0ee16b1..ca75927 100644
--- a/solr/core/src/java/org/apache/solr/handler/SolrDefaultStreamFactory.java
+++ b/solr/core/src/java/org/apache/solr/handler/SolrDefaultStreamFactory.java
@@ -34,7 +34,7 @@ public class SolrDefaultStreamFactory extends DefaultStreamFactory {
   public SolrDefaultStreamFactory() {
     super();
     this.withFunctionName("analyze",  AnalyzeEvaluator.class);
-    this.withFunctionName("files", FilesStream.class);
+    this.withFunctionName("cat", CatStream.class);
     this.withFunctionName("classify", ClassifyStream.class);
     this.withFunctionName("haversineMeters", HaversineMetersEvaluator.class);
   }
diff --git a/solr/solr-ref-guide/src/stream-source-reference.adoc b/solr/solr-ref-guide/src/stream-source-reference.adoc
index e168e7e..d3040fe 100644
--- a/solr/solr-ref-guide/src/stream-source-reference.adoc
+++ b/solr/solr-ref-guide/src/stream-source-reference.adoc
@@ -216,31 +216,31 @@ features(collection1,
          numTerms=250)
 ----
 
-== files
+== cat
 
-The `files` function reads the specified files or directories and emits each line in the file(s) as a tuple.
+The `cat` function reads the specified files or directories and emits each line in the file(s) as a tuple.
 
 Each emitted tuple contains two fields: `file` and `line`.  `file` contains the path to the file being read from relative to the `userfiles` chroot (directly under `$SOLR_HOME`), and `line` contains a line in that file.
 
-`files` is ideally used with the `update` stream to index data from the specified documents, or with the `analyze` stream to further split the lines into individual tokens for statistical processing or visualization.
+`cat` is ideally used with the `update` stream to index data from the specified documents, or with the `analyze` stream to further split the lines into individual tokens for statistical processing or visualization.
 
-=== files Parameters
+=== cat Parameters
 
 * `filePaths`: (Mandatory) a comma separated list of filepaths to read lines from.  If the specified path is a directory, it will be crawled recursively and all contained files will be read.  To prevent malicious users from reading arbitrary files from Solr nodes, `filePaths` must be a relative path measured from a chroot of `$SOLR_HOME/userfiles` on the node running the streaming expression.
 * `maxLines`: (defaults to -1) The maximum number of lines to read (and tuples to emit).  If a negative value is specified, all lines in the specified files will be emitted as tuples.  Files are read in the order that they appear in the comma-separated `filePaths` argument.  If the line-limit is hit, it will be these later files that are partially emitted or not read at all.
 
-=== files Examples
+=== cat Examples
 
 The following example emits all lines from a single text file located at `$SOLR_HOME/userfiles/authors.txt`:
 [source,text]
 ----
-files("authors.txt")
+cat("authors.txt")
 ----
 
 This example will read lines from `$SOLR_HOME/userfiles/authors.txt`, as well as all files (recursively) found under `$SOLR_HOME/userfiles/fiction/scifi`.  Only 500 lines will be emitted, meaning that some files may be partially emitted or not read at all:
 [source,text]
 ----
-files("authors.txt,fiction/scifi/", maxLines=500)
+cat("authors.txt,fiction/scifi/", maxLines=500)
 ----
 
 == nodes
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
index 146912f..ee63a2c 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
@@ -3064,10 +3064,10 @@ public class StreamExpressionTest extends SolrCloudTestCase {
   }
 
   @Test
-  public void testFileStreamSingleFile() throws Exception {
-    final String fileStream = "files(\"topLevel1.txt\")";
+  public void testCatStreamSingleFile() throws Exception {
+    final String catStream = "cat(\"topLevel1.txt\")";
     ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
-    paramsLoc.set("expr", fileStream);
+    paramsLoc.set("expr", catStream);
     paramsLoc.set("qt", "/stream");
     String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+FILESTREAM_COLLECTION;
 
@@ -3086,10 +3086,10 @@ public class StreamExpressionTest extends SolrCloudTestCase {
   }
 
   @Test
-  public void testFileStreamMaxLines() throws Exception {
-    final String fileStream = "files(\"topLevel1.txt\", maxLines=2)";
+  public void testCatStreamMaxLines() throws Exception {
+    final String catStream = "cat(\"topLevel1.txt\", maxLines=2)";
     ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
-    paramsLoc.set("expr", fileStream);
+    paramsLoc.set("expr", catStream);
     paramsLoc.set("qt", "/stream");
     String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+FILESTREAM_COLLECTION;
 
@@ -3108,10 +3108,10 @@ public class StreamExpressionTest extends SolrCloudTestCase {
   }
 
   @Test
-  public void testFileStreamDirectoryCrawl() throws Exception {
-    final String fileStream = "files(\"directory1\")";
+  public void testCatStreamDirectoryCrawl() throws Exception {
+    final String catStream = "cat(\"directory1\")";
     ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
-    paramsLoc.set("expr", fileStream);
+    paramsLoc.set("expr", catStream);
     paramsLoc.set("qt", "/stream");
     String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+FILESTREAM_COLLECTION;
 
@@ -3122,24 +3122,26 @@ public class StreamExpressionTest extends SolrCloudTestCase {
     List<Tuple> tuples = getTuples(solrStream);
     assertEquals(8, tuples.size());
 
+    final String expectedSecondLevel1Path = "directory1" + File.separator + "secondLevel1.txt";
     for (int i = 0; i < 4; i++) {
       Tuple t = tuples.get(i);
       assertEquals("secondLevel1.txt line " + String.valueOf(i+1), t.get("line"));
-      assertEquals("directory1/secondLevel1.txt", t.get("file"));
+      assertEquals(expectedSecondLevel1Path, t.get("file"));
     }
 
+    final String expectedSecondLevel2Path = "directory1" + File.separator + "secondLevel2.txt";
     for (int i = 4; i < 8; i++) {
       Tuple t = tuples.get(i);
       assertEquals("secondLevel2.txt line " + String.valueOf(i - 3), t.get("line"));
-      assertEquals("directory1/secondLevel2.txt", t.get("file"));
+      assertEquals(expectedSecondLevel2Path, t.get("file"));
     }
   }
 
   @Test
-  public void testFileStreamMultipleExplicitFiles() throws Exception {
-    final String fileStream = "files(\"topLevel1.txt,directory1/secondLevel2.txt\")";
+  public void testCatStreamMultipleExplicitFiles() throws Exception {
+    final String catStream = "cat(\"topLevel1.txt,directory1" + File.separator + "secondLevel2.txt\")";
     ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
-    paramsLoc.set("expr", fileStream);
+    paramsLoc.set("expr", catStream);
     paramsLoc.set("qt", "/stream");
     String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+FILESTREAM_COLLECTION;
 
@@ -3156,10 +3158,11 @@ public class StreamExpressionTest extends SolrCloudTestCase {
       assertEquals("topLevel1.txt", t.get("file"));
     }
 
+    final String expectedSecondLevel2Path = "directory1" + File.separator + "secondLevel2.txt";
     for (int i = 4; i < 8; i++) {
       Tuple t = tuples.get(i);
       assertEquals("secondLevel2.txt line " + String.valueOf(i - 3), t.get("line"));
-      assertEquals("directory1/secondLevel2.txt", t.get("file"));
+      assertEquals(expectedSecondLevel2Path, t.get("file"));
     }
   }