You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2017/05/10 21:42:35 UTC

[14/50] [abbrv] lucene-solr:master: squash merge jira/solr-10290 into master

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/95968c69/solr/solr-ref-guide/tools/ReducePDFSize.java
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/tools/ReducePDFSize.java b/solr/solr-ref-guide/tools/ReducePDFSize.java
new file mode 100644
index 0000000..4506cae
--- /dev/null
+++ b/solr/solr-ref-guide/tools/ReducePDFSize.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.common.PDStream;
+
+/**
+ * A simple command line utility for reducing the size of the ref-guide PDF.
+ * <p>
+ * Currently this script focuses on using {@link COSName#FLATE_DECODE} to compress the (decoded) Objects 
+ * in the source PDF, but other improvements may be possible in the future.
+ * </p>
+ * <p>
+ * This code is originally based on the <code>WriteDecodedDoc</code> example provided with <a href="https://pdfbox.apache.org/">Apache PDFBox</a>.
+ * </p>
+ * <p>
+ * <b>NOTE:</b> This class should <em>NOT</em> be considered a general purpose tool for reducing the size of 
+ * <em>any</em> PDF.  
+ * Decisions made in this code can and will be focused explicitly on serving the purpose of reducing the size of the 
+ * Solr Reference Guide PDF, as originally produced by asciidoctor, and may not be generally useful for all PDFs 
+ * "in the wild".
+ * </p>
+ */
+public class ReducePDFSize {
+
+  public static void main(String[] args) throws IOException {
+    if (2 != args.length) {
+      throw new RuntimeException("arg0 must be input file, org1 must be output file");
+    }
+    String in = args[0];
+    String out = args[1];
+    PDDocument doc = null;
+    
+    try {
+      doc = PDDocument.load(new File(in));
+      doc.setAllSecurityToBeRemoved(true);
+      for (COSObject cosObject : doc.getDocument().getObjects()) {
+        COSBase base = cosObject.getObject();
+        // if it's a stream: decode it, then re-write it using FLATE_DECODE
+        if (base instanceof COSStream) {
+          COSStream stream = (COSStream) base;
+          byte[] bytes;
+          try {
+            bytes = new PDStream(stream).toByteArray();
+          } catch (IOException ex) {
+            // NOTE: original example code from PDFBox just logged & "continue;"d here, 'skipping' this stream.
+            // If this type of failure ever happens, we can (perhaps) consider (re)ignoring this type of failure?
+            //
+            // IIUC then that will leave the original (non-decoded / non-flated) stream in place?
+            throw new RuntimeException("can't serialize byte[] from: " +
+                                       cosObject.getObjectNumber() + " " + 
+                                       cosObject.getGenerationNumber() + " obj: " + 
+                                       ex.getMessage(), ex);
+          }
+          stream.removeItem(COSName.FILTER);
+          OutputStream streamOut = stream.createOutputStream(COSName.FLATE_DECODE);
+          streamOut.write(bytes);
+          streamOut.close();
+        }
+      }
+      doc.getDocumentCatalog();
+      doc.save( out );
+    } finally {
+      if ( doc != null ) {
+        doc.close();
+      }
+    }
+  }
+}