You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2017/05/10 21:42:35 UTC
[14/50] [abbrv] lucene-solr:master: squash merge jira/solr-10290 into
master
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/95968c69/solr/solr-ref-guide/tools/ReducePDFSize.java
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/tools/ReducePDFSize.java b/solr/solr-ref-guide/tools/ReducePDFSize.java
new file mode 100644
index 0000000..4506cae
--- /dev/null
+++ b/solr/solr-ref-guide/tools/ReducePDFSize.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.common.PDStream;
+
+/**
+ * A simple command line utility for reducing the size of the ref-guide PDF.
+ * <p>
+ * Currently this script focuses on using {@link COSName#FLATE_DECODE} to compress the (decoded) Objects
+ * in the source PDF, but other improvements may be possible in the future.
+ * </p>
+ * <p>
+ * This code is originally based on the <code>WriteDecodedDoc</code> example provided with <a href="https://pdfbox.apache.org/">Apache PDFBox</a>.
+ * </p>
+ * <p>
+ * <b>NOTE:</b> This class should <em>NOT</em> be considered a general purpose tool for reducing the size of
+ * <em>any</em> PDF.
+ * Decisions made in this code can and will be focused explicitly on serving the purpose of reducing the size of the
+ * Solr Reference Guide PDF, as originally produced by asciidoctor, and may not be generally useful for all PDFs
+ * "in the wild".
+ * </p>
+ */
+public class ReducePDFSize {
+
+ public static void main(String[] args) throws IOException {
+ if (2 != args.length) {
+ throw new RuntimeException("arg0 must be input file, org1 must be output file");
+ }
+ String in = args[0];
+ String out = args[1];
+ PDDocument doc = null;
+
+ try {
+ doc = PDDocument.load(new File(in));
+ doc.setAllSecurityToBeRemoved(true);
+ for (COSObject cosObject : doc.getDocument().getObjects()) {
+ COSBase base = cosObject.getObject();
+ // if it's a stream: decode it, then re-write it using FLATE_DECODE
+ if (base instanceof COSStream) {
+ COSStream stream = (COSStream) base;
+ byte[] bytes;
+ try {
+ bytes = new PDStream(stream).toByteArray();
+ } catch (IOException ex) {
+ // NOTE: original example code from PDFBox just logged & "continue;"d here, 'skipping' this stream.
+ // If this type of failure ever happens, we can (perhaps) consider (re)ignoring this type of failure?
+ //
+ // IIUC then that will leave the original (non-decoded / non-flated) stream in place?
+ throw new RuntimeException("can't serialize byte[] from: " +
+ cosObject.getObjectNumber() + " " +
+ cosObject.getGenerationNumber() + " obj: " +
+ ex.getMessage(), ex);
+ }
+ stream.removeItem(COSName.FILTER);
+ OutputStream streamOut = stream.createOutputStream(COSName.FLATE_DECODE);
+ streamOut.write(bytes);
+ streamOut.close();
+ }
+ }
+ doc.getDocumentCatalog();
+ doc.save( out );
+ } finally {
+ if ( doc != null ) {
+ doc.close();
+ }
+ }
+ }
+}