You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2020/02/20 14:43:35 UTC

[lucene-solr] branch jira/solr14270 created (now f5aef9d)

This is an automated email from the ASF dual-hosted git repository.

noble pushed a change to branch jira/solr14270
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


      at f5aef9d  SOLR-14270: implemented and tested

This branch includes the following new commits:

     new f5aef9d  SOLR-14270: implemented and tested

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[lucene-solr] 01/01: SOLR-14270: implemented and tested

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

noble pushed a commit to branch jira/solr14270
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit f5aef9deb6cf907b893fc7f5d7e7bee00eb3111c
Author: noble <no...@apache.org>
AuthorDate: Fri Feb 21 01:43:02 2020 +1100

    SOLR-14270: implemented and tested
---
 .../src/java/org/apache/solr/util/ExportTool.java  | 41 ++++++++++++++--------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/util/ExportTool.java b/solr/core/src/java/org/apache/solr/util/ExportTool.java
index ab29800..9ec5a40 100644
--- a/solr/core/src/java/org/apache/solr/util/ExportTool.java
+++ b/solr/core/src/java/org/apache/solr/util/ExportTool.java
@@ -41,6 +41,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.BiConsumer;
 import java.util.function.Consumer;
+import java.util.zip.GZIPOutputStream;
 
 import com.google.common.collect.ImmutableSet;
 import org.apache.commons.cli.CommandLine;
@@ -185,13 +186,23 @@ public class ExportTool extends SolrCLI.ToolBase {
     info.exportDocs();
   }
 
-  interface DocsSink {
-    default void start() throws IOException {
-    }
+  static abstract class DocsSink {
+    Info info;
+    OutputStream fos;
+
+    abstract void start() throws IOException ;
 
-    void accept(SolrDocument document) throws IOException, InterruptedException;
+    void accept(SolrDocument document) throws IOException {
+      long count = info.docsWritten.incrementAndGet();
+
+      if (count % 100000 == 0) {
+        System.out.println("\nDOCS: " + count);
+      }
 
-    default void end() throws IOException {
+
+    }
+
+    void end() throws IOException {
     }
   }
 
@@ -228,13 +239,10 @@ public class ExportTool extends SolrCLI.ToolBase {
           .create("fields")
   };
 
-  static class JsonSink implements DocsSink {
-    private final Info info;
+  static class JsonSink extends DocsSink {
     private CharArr charArr = new CharArr(1024 * 2);
     JSONWriter jsonWriter = new JSONWriter(charArr, -1);
     private Writer writer;
-    private OutputStream fos;
-    public AtomicLong docs = new AtomicLong();
 
     public JsonSink(Info info) {
       this.info = info;
@@ -243,6 +251,7 @@ public class ExportTool extends SolrCLI.ToolBase {
     @Override
     public void start() throws IOException {
       fos = new FileOutputStream(info.out);
+      if(info.out.endsWith(".json.gz") || info.out.endsWith(".json.")) fos = new GZIPOutputStream(fos);
       if (info.bufferSize > 0) {
         fos = new BufferedOutputStream(fos, info.bufferSize);
       }
@@ -259,7 +268,6 @@ public class ExportTool extends SolrCLI.ToolBase {
 
     @Override
     public synchronized void accept(SolrDocument doc) throws IOException {
-      docs.incrementAndGet();
       charArr.reset();
       Map m = new LinkedHashMap(doc.size());
       doc.forEach((s, field) -> {
@@ -274,13 +282,12 @@ public class ExportTool extends SolrCLI.ToolBase {
       jsonWriter.write(m);
       writer.write(charArr.getArray(), charArr.getStart(), charArr.getEnd());
       writer.append('\n');
+      super.accept(doc);
     }
   }
 
-  private static class JavabinSink implements DocsSink {
-    private final Info info;
+  static class JavabinSink extends DocsSink {
     JavaBinCodec codec;
-    OutputStream fos;
 
     public JavabinSink(Info info) {
       this.info = info;
@@ -289,6 +296,7 @@ public class ExportTool extends SolrCLI.ToolBase {
     @Override
     public void start() throws IOException {
       fos = new FileOutputStream(info.out);
+      if(info.out.endsWith(".json.gz") || info.out.endsWith(".json.")) fos = new GZIPOutputStream(fos);
       if (info.bufferSize > 0) {
         fos = new BufferedOutputStream(fos, info.bufferSize);
       }
@@ -330,6 +338,7 @@ public class ExportTool extends SolrCLI.ToolBase {
       codec.writeTag(SOLRINPUTDOC, sz);
       codec.writeFloat(1f); // document boost
       doc.forEach(bic);
+      super.accept(doc);
     }
   }
 
@@ -339,6 +348,7 @@ public class ExportTool extends SolrCLI.ToolBase {
     SolrDocument EOFDOC = new SolrDocument();
     volatile boolean failed = false;
     Map<String, CoreHandler> corehandlers = new HashMap();
+    private long startTime = System.currentTimeMillis();
 
     public MultiThreadedRunner(String url) {
       super(url);
@@ -362,7 +372,7 @@ public class ExportTool extends SolrCLI.ToolBase {
         addConsumer(consumerlatch);
         addProducers(m);
         if (output != null) {
-          output.println("NO of shards : " + corehandlers.size());
+          output.println("NO: of shards : " + corehandlers.size());
         }
         CountDownLatch producerLatch = new CountDownLatch(corehandlers.size());
         corehandlers.forEach((s, coreHandler) -> producerThreadpool.submit(() -> {
@@ -390,6 +400,8 @@ public class ExportTool extends SolrCLI.ToolBase {
             //ignore
           }
         }
+        System.out.println("\nTotal Docs exported: "+ (docsWritten.get() -1)+
+            ". Time taken: "+( (System.currentTimeMillis() - startTime)/1000) + "secs");
       }
     }
 
@@ -418,7 +430,6 @@ public class ExportTool extends SolrCLI.ToolBase {
           try {
             if (docsWritten.get() > limit) continue;
             sink.accept(doc);
-            docsWritten.incrementAndGet();
           } catch (Exception e) {
             if (output != null) output.println("Failed to write to file " + e.getMessage());
             failed = true;