You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ep...@apache.org on 2020/09/21 13:43:16 UTC

[lucene-solr] branch SOLR-14882 created (now 182cf76)

This is an automated email from the ASF dual-hosted git repository.

epugh pushed a change to branch SOLR-14882
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


      at 182cf76  update output format to be .jsonl everywhere

This branch includes the following new commits:

     new 182cf76  update output format to be .jsonl everywhere

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[lucene-solr] 01/01: update output format to be .jsonl everywhere

Posted by ep...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

epugh pushed a commit to branch SOLR-14882
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 182cf76cf7c885b708d4b3c96d14312e92635f85
Author: epugh <ep...@opensourceconnections.com>
AuthorDate: Mon Sep 21 09:42:45 2020 -0400

    update output format to be .jsonl everywhere
---
 .../src/java/org/apache/solr/util/ExportTool.java  | 10 ++--
 .../test/org/apache/solr/util/TestExportTool.java  | 53 ++++++++++++++++++++--
 2 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/util/ExportTool.java b/solr/core/src/java/org/apache/solr/util/ExportTool.java
index 6071bbf..2264c3c 100644
--- a/solr/core/src/java/org/apache/solr/util/ExportTool.java
+++ b/solr/core/src/java/org/apache/solr/util/ExportTool.java
@@ -130,7 +130,7 @@ public class ExportTool extends SolrCLI.ToolBase {
 
     public void setOutFormat(String out, String format) {
       this.format = format;
-      if (format == null) format = "jsonl";
+      if (format == null) format = "json";
       if (!formats.contains(format)) {
         throw new IllegalArgumentException("format must be one of :" + formats);
       }
@@ -139,7 +139,7 @@ public class ExportTool extends SolrCLI.ToolBase {
       if (this.out == null) {
         this.out = JAVABIN.equals(format) ?
             coll + ".javabin" :
-            coll + ".json";
+            coll + ".jsonl";
       }
 
     }
@@ -226,7 +226,7 @@ public class ExportTool extends SolrCLI.ToolBase {
       Option.builder("format")
           .hasArg()
           .required(false)
-          .desc("format  json/javabin, default to json. file extension would be .json")
+          .desc("format  jsonl/javabin, defaults to jsonl. file extension will be .jsonl")
           .build(),
       Option.builder("limit")
           .hasArg()
@@ -257,7 +257,9 @@ public class ExportTool extends SolrCLI.ToolBase {
     @Override
     public void start() throws IOException {
       fos = new FileOutputStream(info.out);
-      if(info.out.endsWith(".json.gz") || info.out.endsWith(".json.")) fos = new GZIPOutputStream(fos);
+      if(info.out.endsWith(".jsonl.gz")) {
+        fos = new GZIPOutputStream(fos);
+      }
       if (info.bufferSize > 0) {
         fos = new BufferedOutputStream(fos, info.bufferSize);
       }
diff --git a/solr/core/src/test/org/apache/solr/util/TestExportTool.java b/solr/core/src/test/org/apache/solr/util/TestExportTool.java
index a69d733..97acc0c 100644
--- a/solr/core/src/test/org/apache/solr/util/TestExportTool.java
+++ b/solr/core/src/test/org/apache/solr/util/TestExportTool.java
@@ -18,9 +18,11 @@
 package org.apache.solr.util;
 
 import java.io.File;
+import java.io.InputStream;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.util.zip.GZIPInputStream;
 import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
@@ -44,9 +46,37 @@ import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.util.FastInputStream;
 import org.apache.solr.common.util.JsonRecordReader;
 
+import org.junit.Test;
+
 @SolrTestCaseJ4.SuppressSSL
 public class TestExportTool extends SolrCloudTestCase {
 
+  @Test(expected = IllegalArgumentException.class)
+  public void testJsonNotValidOutputFileFormat() throws Exception {
+    ExportTool.Info info = new ExportTool.MultiThreadedRunner("http://somesolr/mycollection/");
+    info.setOutFormat(null, "json");
+  }
+
+  public void testJsonlValidOutputFileFormat() throws Exception {
+    ExportTool.Info info = new ExportTool.MultiThreadedRunner("http://somesolr/mycollection/");
+    info.setOutFormat(null, "jsonl");
+  }
+
+  public void testJavabinValidOutputFileFormat() throws Exception {
+    ExportTool.Info info = new ExportTool.MultiThreadedRunner("http://somesolr/mycollection/");
+    info.setOutFormat(null, "javabin");
+  }
+
+  public void testGZJsonlValidOutputFileFormat() throws Exception {
+    ExportTool.Info info = new ExportTool.MultiThreadedRunner("http://somesolr/mycollection/");
+    info.setOutFormat("/somedir.jsonl.gz", "jsonl");
+  }
+
+  public void testGZJavabinValidOutputFileFormat() throws Exception {
+    ExportTool.Info info = new ExportTool.MultiThreadedRunner("http://somesolr/mycollection/");
+    info.setOutFormat("/somedir.javabin.gz", "javabin");
+  }
+
   public void testBasic() throws Exception {
     String COLLECTION_NAME = "globalLoaderColl";
     configureCluster(4)
@@ -80,7 +110,16 @@ public class TestExportTool extends SolrCloudTestCase {
 
 
       ExportTool.Info info = new ExportTool.MultiThreadedRunner(url);
-      String absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".json";
+      String absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".jsonl";
+      info.setOutFormat(absolutePath, "jsonl");
+      info.setLimit("200");
+      info.fields = "id,desc_s,a_dt";
+      info.exportDocs();
+
+      assertJsonDocsCount(info, 200, record -> "2019-09-30T05:58:03Z".equals(record.get("a_dt")));
+
+      info = new ExportTool.MultiThreadedRunner(url);
+      absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".jsonl.gz";
       info.setOutFormat(absolutePath, "jsonl");
       info.setLimit("200");
       info.fields = "id,desc_s,a_dt";
@@ -89,7 +128,7 @@ public class TestExportTool extends SolrCloudTestCase {
       assertJsonDocsCount(info, 200, record -> "2019-09-30T05:58:03Z".equals(record.get("a_dt")));
 
       info = new ExportTool.MultiThreadedRunner(url);
-      absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".json";
+      absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".jsonl";
       info.setOutFormat(absolutePath, "jsonl");
       info.setLimit("-1");
       info.fields = "id,desc_s";
@@ -182,7 +221,7 @@ public class TestExportTool extends SolrCloudTestCase {
       }
       info = new ExportTool.MultiThreadedRunner(url);
       info.output = System.out;
-      absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".json";
+      absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".jsonl";
       info.setOutFormat(absolutePath, "jsonl");
       info.fields = "id,desc_s";
       info.setLimit("-1");
@@ -214,13 +253,17 @@ public class TestExportTool extends SolrCloudTestCase {
     }
   }
 
-    private void assertJsonDocsCount(ExportTool.Info info, int expected, Predicate<Map<String,Object>> predicate) throws IOException {
+  private void assertJsonDocsCount(ExportTool.Info info, int expected, Predicate<Map<String,Object>> predicate) throws IOException {
     assertTrue("" + info.docsWritten.get() + " expected " + expected, info.docsWritten.get() >= expected);
 
     JsonRecordReader jsonReader;
     Reader rdr;
     jsonReader = JsonRecordReader.getInst("/", Arrays.asList("$FQN:/**"));
-    rdr = new InputStreamReader(new FileInputStream(info.out), StandardCharsets.UTF_8);
+    InputStream is = new FileInputStream(info.out);
+    if(info.out.endsWith(".jsonl.gz")) {
+      is = new GZIPInputStream(is);
+    }
+    rdr = new InputStreamReader(is, StandardCharsets.UTF_8);
     try {
       int[] count = new int[]{0};
       jsonReader.streamRecords(rdr, (record, path) -> {