You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2017/03/31 19:14:51 UTC
lucene-solr:jira/solr-10290: SOLR-10298: new pdfbox based tool to
reduce PDF size (via FLATE_DECODE)
Repository: lucene-solr
Updated Branches:
refs/heads/jira/solr-10290 ec2cbb3ee -> d14977553
SOLR-10298: new pdfbox based tool to reduce PDF size (via FLATE_DECODE)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d1497755
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d1497755
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d1497755
Branch: refs/heads/jira/solr-10290
Commit: d14977553f2597d84f907011a5c7275bfb79be77
Parents: ec2cbb3
Author: Chris Hostetter <ho...@apache.org>
Authored: Fri Mar 31 11:09:26 2017 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Fri Mar 31 12:14:26 2017 -0700
----------------------------------------------------------------------
solr/solr-ref-guide/build.xml | 25 +++++--
solr/solr-ref-guide/ivy.xml | 3 +
solr/solr-ref-guide/tools/ReducePDFSize.java | 90 +++++++++++++++++++++++
3 files changed, 112 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d1497755/solr/solr-ref-guide/build.xml
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/build.xml b/solr/solr-ref-guide/build.xml
index 47e87e4..cc21bc4 100644
--- a/solr/solr-ref-guide/build.xml
+++ b/solr/solr-ref-guide/build.xml
@@ -126,16 +126,19 @@
</target>
<!-- ====== PDF Build ======= -->
- <target name="build-pdf"
- depends="build-nav-data-files,resolve"
- description="Builds a PDF">
+ <target name="build-pdf" depends="-build-raw-pdf,-reduce-pdf-size" description="Builds a PDF">
+ <echo>Finished Building ${build.dir}/${pdf-filename}</echo>
+ </target>
+ <target name="-build-raw-pdf"
+ depends="build-nav-data-files,resolve">
+ <mkdir dir="${build.dir}/pdf-tmp"/>
<taskdef uri="antlib:org.asciidoctor.ant" resource="org/asciidoctor/ant/antlib.xml"
classpathref="tools-run-classpath"/>
<asciidoctor:convert
sourceDirectory="${build.content.dir}/pdf"
sourceDocumentName="SolrRefGuide-all.adoc"
baseDir="${build.content.dir}"
- outputDirectory="${build.dir}"
+ outputDirectory="${build.dir}/pdf-tmp"
backend="pdf"
extensions="adoc"
sourceHighlighter="coderay"
@@ -160,10 +163,20 @@
<attribute key="build-date" value="${DSTAMP}" />
<attribute key="build-year" value="${current.year}" />
</asciidoctor:convert>
- <move file="${build.dir}/SolrRefGuide-all.pdf" tofile="${build.dir}/${pdf-filename}" />
- <echo>Finished Building ${build.dir}/${pdf-filename}</echo>
+ <move file="${build.dir}/pdf-tmp/SolrRefGuide-all.pdf" tofile="${build.dir}/pdf-tmp/RAW-${pdf-filename}" />
+ </target>
+ <target name="-reduce-pdf-size" depends="build-init,build-tools-jar">
+ <java classname="ReducePDFSize"
+ failonerror="true"
+ fork="true">
+ <classpath refid="tools-run-classpath"/>
+ <arg value="${build.dir}/pdf-tmp/RAW-${pdf-filename}"/>
+ <arg value="${build.dir}/${pdf-filename}"/>
+ </java>
</target>
+
+
<!-- ======= HTML Site Build =======
Builds site with Jekyll.
This (for now) assumes that Jekyll (http://jekyllrb.com) is installed locally. -->
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d1497755/solr/solr-ref-guide/ivy.xml
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/ivy.xml b/solr/solr-ref-guide/ivy.xml
index e59a845..adc182e 100644
--- a/solr/solr-ref-guide/ivy.xml
+++ b/solr/solr-ref-guide/ivy.xml
@@ -25,5 +25,8 @@
<dependency org="org.asciidoctor" name="asciidoctor-ant" rev="${/org.asciidoctor/asciidoctor-ant}" conf="compile" />
<dependency org="org.json" name="json" rev="${/org.json/json}" conf="compile" />
<dependency org="org.jsoup" name="jsoup" rev="${/org.jsoup/jsoup}" conf="compile" />
+ <dependency org="org.apache.pdfbox" name="pdfbox" rev="${/org.apache.pdfbox/pdfbox}" conf="compile"/>
+ <dependency org="org.slf4j" name="jcl-over-slf4j" rev="${/org.slf4j/jcl-over-slf4j}" conf="compile"/>
+ <dependency org="org.slf4j" name="slf4j-api" rev="${/org.slf4j/slf4j-api}" conf="compile"/>
</dependencies>
</ivy-module>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d1497755/solr/solr-ref-guide/tools/ReducePDFSize.java
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/tools/ReducePDFSize.java b/solr/solr-ref-guide/tools/ReducePDFSize.java
new file mode 100644
index 0000000..4506cae
--- /dev/null
+++ b/solr/solr-ref-guide/tools/ReducePDFSize.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.common.PDStream;
+
+/**
+ * A simple command line utility for reducing the size of the ref-guide PDF.
+ * <p>
+ * Currently this script focuses on using {@link COSName#FLATE_DECODE} to compress the (decoded) Objects
+ * in the source PDF, but other improvements may be possible in the future.
+ * </p>
+ * <p>
+ * This code is originally based on the <code>WriteDecodedDoc</code> example provided with <a href="https://pdfbox.apache.org/">Apache PDFBox</a>.
+ * </p>
+ * <p>
+ * <b>NOTE:</b> This class should <em>NOT</em> be considered a general purpose tool for reducing the size of
+ * <em>any</em> PDF.
+ * Decisions made in this code can and will be focused explicitly on serving the purpose of reducing the size of the
+ * Solr Reference Guide PDF, as originally produced by asciidoctor, and may not be generally useful for all PDFs
+ * "in the wild".
+ * </p>
+ */
+public class ReducePDFSize {
+
+ public static void main(String[] args) throws IOException {
+ if (2 != args.length) {
+ throw new RuntimeException("arg0 must be input file, org1 must be output file");
+ }
+ String in = args[0];
+ String out = args[1];
+ PDDocument doc = null;
+
+ try {
+ doc = PDDocument.load(new File(in));
+ doc.setAllSecurityToBeRemoved(true);
+ for (COSObject cosObject : doc.getDocument().getObjects()) {
+ COSBase base = cosObject.getObject();
+ // if it's a stream: decode it, then re-write it using FLATE_DECODE
+ if (base instanceof COSStream) {
+ COSStream stream = (COSStream) base;
+ byte[] bytes;
+ try {
+ bytes = new PDStream(stream).toByteArray();
+ } catch (IOException ex) {
+ // NOTE: original example code from PDFBox just logged & "continue;"d here, 'skipping' this stream.
+ // If this type of failure ever happens, we can (perhaps) consider (re)ignoring this type of failure?
+ //
+ // IIUC then that will leave the original (non-decoded / non-flated) stream in place?
+ throw new RuntimeException("can't serialize byte[] from: " +
+ cosObject.getObjectNumber() + " " +
+ cosObject.getGenerationNumber() + " obj: " +
+ ex.getMessage(), ex);
+ }
+ stream.removeItem(COSName.FILTER);
+ OutputStream streamOut = stream.createOutputStream(COSName.FLATE_DECODE);
+ streamOut.write(bytes);
+ streamOut.close();
+ }
+ }
+ doc.getDocumentCatalog();
+ doc.save( out );
+ } finally {
+ if ( doc != null ) {
+ doc.close();
+ }
+ }
+ }
+}