You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/07/07 06:39:15 UTC

[02/16] tika git commit: fix for TIKA-2021 contributed by Zarana Parekh

fix for TIKA-2021 contributed by Zarana Parekh


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/de84d71b
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/de84d71b
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/de84d71b

Branch: refs/heads/master
Commit: de84d71b145045792b8a3bd175634251623188dc
Parents: 48b27d2
Author: Zarana Parekh <za...@gmail.com>
Authored: Fri Jun 24 19:28:26 2016 -0700
Committer: Zarana Parekh <za...@gmail.com>
Committed: Fri Jun 24 19:28:26 2016 -0700

----------------------------------------------------------------------
 tika-bundle/pom.xml                             | 628 +++++++++----------
 .../tika/parser/ocr/TesseractOCRParser.java     |  26 +-
 2 files changed, 327 insertions(+), 327 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/de84d71b/tika-bundle/pom.xml
----------------------------------------------------------------------
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index 7fb5c8d..e94b43f 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -113,320 +113,320 @@
 
   <build>
   	<pluginManagement>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.felix</groupId>
-        <artifactId>maven-bundle-plugin</artifactId>
-        <extensions>true</extensions>
-        <configuration>
-          <instructions>
-            <_runsystempackages>com.sun.xml.bind.marshaller, com.sun.xml.internal.bind.marshaller</_runsystempackages>
-            <Bundle-Activator>
-              org.apache.tika.parser.internal.Activator
-            </Bundle-Activator>
-            <Embed-Dependency>
-              tika-parsers;inline=true,
-              commons-compress, xz, commons-codec, commons-csv,
-              commons-io, commons-exec, junrar,
-              pdfbox,pdfbox-tools,pdfbox-debugger,fontbox,jempbox,bcmail-jdk15on,bcprov-jdk15on,bcpkix-jdk15on,
-              poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas,
-              curvesapi,
-              xmlbeans,
-              jackcess,
-              commons-lang,
-              tagsoup,
-              asm,
-              juniversalchardet,
-              vorbis-java-core, vorbis-java-tika,
-              isoparser,
-              metadata-extractor, xmpcore, json-simple,
-              boilerpipe, rome, rome-utils, opennlp-tools, opennlp-maxent,
-              geoapi, sis-metadata, sis-netcdf, sis-utility,
-              sis-storage, apache-mime4j-core, apache-mime4j-dom,
-              jsr-275, jhighlight, java-libpst, jwnl,
-              netcdf4, grib, cdm, httpservices, jcip-annotations,
-              jmatio, guava
-            </Embed-Dependency>
-            <Embed-Transitive>true</Embed-Transitive>
-            <Bundle-DocURL>${project.url}</Bundle-DocURL>
-            <Export-Package>
-              !org.apache.tika.parser,
-              !org.apache.tika.parser.external,
-              org.apache.tika.parser.*,
-            </Export-Package>
-            <Import-Package>
-              !org.junit,
-              !org.junit.*,
-              !junit.*,
-              !org.apache.ctakes.*,
-              !org.apache.uima.*,
-              *,
-              org.apache.tika.fork,
-              android.util;resolution:=optional,
-              com.adobe.xmp;resolution:=optional,
-              com.adobe.xmp.properties;resolution:=optional,
-              com.google.protobuf;resolution:=optional,
-              com.ibm.icu.text;resolution:=optional,
-              com.sleepycat.je;resolution:=optional,
-              com.sun.javadoc;resolution:=optional,
-              com.sun.xml.bind.marshaller;resolution:=optional,
-              com.sun.xml.internal.bind.marshaller;resolution:=optional,
-              com.sun.msv.datatype;resolution:=optional,
-              com.sun.msv.datatype.xsd;resolution:=optional,
-              com.sun.tools.javadoc;resolution:=optional,
-              edu.wisc.ssec.mcidas;resolution:=optional,
-              edu.wisc.ssec.mcidas.adde;resolution:=optional,
-              javax.activation;resolution:=optional,
-              javax.annotation;resolution:=optional,
-              javax.mail;resolution:=optional,
-              javax.mail.internet;resolution:=optional,
-              javax.servlet.annotation;resolution:=optional,
-              javax.servlet;resolution:=optional,
-              javax.servlet.http;resolution:=optional,
-              javax.measure.converter;resolution:=optional,
-              javax.ws.rs.core;resolution:=optional,
-              net.sf.ehcache;resolution:=optional,
-              nu.xom;resolution:=optional,
-              opendap.dap.http;resolution:=optional,
-              opendap.dap;resolution:=optional,
-              opendap.dap.parser;resolution:=optional,
-              opennlp.maxent;resolution:=optional,
-              opennlp.tools.namefind;resolution:=optional,
-              net.didion.jwnl;resolution:=optional,
-              org.apache.cxf.jaxrs.client;resolution:=optional,
-              org.apache.cxf.jaxrs.ext.multipart;resolution:=optional,
-              org.apache.commons.exec;resolution:=optional,
-              org.apache.commons.io;resolution:=optional,
-              org.apache.commons.httpclient;resolution:=optional,
-              org.apache.commons.httpclient.auth;resolution:=optional,
-              org.apache.commons.httpclient.methods;resolution:=optional,
-              org.apache.commons.httpclient.params;resolution:=optional,
-              org.apache.commons.httpclient.protocol;resolution:=optional,
-              org.apache.commons.httpclient.util;resolution:=optional,
-              org.apache.commons.vfs2;resolution:=optional,
-              org.apache.commons.vfs2.provider;resolution:=optional,
-              org.apache.commons.vfs2.util;resolution:=optional,
-              org.apache.crimson.jaxp;resolution:=optional,
-              org.apache.jcp.xml.dsig.internal.dom;resolution:=optional,
-              org.apache.sis;resolution:=optional,
-              org.apache.sis.distance;resolution:=optional,
-              org.apache.sis.geometry;resolution:=optional,
-              org.apache.tools.ant;resolution:=optional,
-              org.apache.tools.ant.taskdefs;resolution:=optional,
-              org.apache.tools.ant.types;resolution:=optional,
-              org.apache.xerces.parsers;resolution:=optional,
-              org.apache.xerces.util;resolution:=optional,
-              org.apache.xerces.xni;resolution:=optional,
-              org.apache.xerces.xni.parser;resolution:=optional,
-              org.apache.xml.resolver;resolution:=optional,
-              org.apache.xml.resolver.tools;resolution:=optional,
-              org.apache.xml.security;resolution:=optional,
-              org.apache.xml.security.c14n;resolution:=optional,
-              org.apache.xml.security.utils;resolution:=optional,
-              org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
-              org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,
-              org.bouncycastle.cert;resolution:=optional,
-              org.bouncycastle.cert.jcajce;resolution:=optional,
-              org.bouncycastle.cert.ocsp;resolution:=optional,
-              org.bouncycastle.cms.bc;resolution:=optional,
-              org.bouncycastle.operator;resolution:=optional,
-              org.bouncycastle.operator.bc;resolution:=optional,
-              org.bouncycastle.tsp;resolution:=optional,
-              org.cyberneko.html.xercesbridge;resolution:=optional,
-              org.etsi.uri.x01903.v14;resolution:=optional,
-              org.ibex.nestedvm;resolution:=optional,
-              org.gjt.xpp;resolution:=optional,
-              org.jaxen;resolution:=optional,
-              org.jaxen.dom4j;resolution:=optional,
-              org.jaxen.pattern;resolution:=optional,
-              org.jaxen.saxpath;resolution:=optional,
-              org.jdom;resolution:=optional,
-              org.jdom.input;resolution:=optional,
-              org.jdom.output;resolution:=optional,
-              org.jdom2;resolution:=optional,
-              org.jdom2.input;resolution:=optional,
-              org.jdom2.input.sax;resolution:=optional,
-              org.jdom2.output;resolution:=optional,
-              org.jdom2.filter;resolution:=optional,
-              org.json.simple;resolution:=optional,
-              org.json;resolution:=optional,
-              org.openxmlformats.schemas.officeDocument.x2006.math;resolution:=optional,
-              org.openxmlformats.schemas.schemaLibrary.x2006.main;resolution:=optional,
-              org.osgi.framework;resolution:=optional,
-              org.quartz;resolution:=optional,
-              org.quartz.impl;resolution:=optional,
-              org.slf4j;resolution:=optional,
-              org.sqlite;resolution:=optional,
-              org.w3c.dom;resolution:=optional,
-              org.relaxng.datatype;resolution:=optional,
-              org.xml.sax;resolution:=optional,
-              org.xml.sax.ext;resolution:=optional,
-              org.xml.sax.helpers;resolution:=optional,
-              org.xmlpull.v1;resolution:=optional,
-              com.microsoft.schemas.office.powerpoint;resolution:=optional,
-              com.microsoft.schemas.office.word;resolution:=optional,              sun.misc;resolution:=optional,
-              ucar.units;resolution:=optional,
-              ucar.httpservices;resolution:=optional,
-              ucar.nc2.util;resolution:=optional,
-              ucar.nc2.util.cache;resolution:=optional,
-              ucar.nc2.dataset;resolution:=optional,
-              ucar.nc2;resolution:=optional,
-              ucar.nc2.constants;resolution:=optional,
-              ucar.nc2.dt;resolution:=optional,
-              ucar.nc2.dt.grid;resolution:=optional,
-              ucar.nc2.ft;resolution:=optional,
-              ucar.nc2.iosp;resolution:=optional,
-              ucar.nc2.iosp.hdf4;resolution:=optional,
-              ucar.nc2.ncml;resolution:=optional,
-              ucar.nc2.stream;resolution:=optional,
-              ucar.nc2.time;resolution:=optional,
-              ucar.nc2.units;resolution:=optional,
-              ucar.nc2.wmo;resolution:=optional,
-              ucar.nc2.write;resolution:=optional,
-              ucar.ma2;resolution:=optional,
-              ucar.grib;resolution:=optional,
-              ucar.grib.grib1;resolution:=optional,
-              ucar.grib.grib2;resolution:=optional,
-              ucar.grid;resolution:=optional,
-              ucar.unidata.geoloc;resolution:=optional,
-              ucar.unidata.geoloc.projection;resolution:=optional,
-              ucar.unidata.geoloc.projection.proj4;resolution:=optional,
-              ucar.unidata.geoloc.projection.sat;resolution:=optional,
-              ucar.unidata.io;resolution:=optional,
-              ucar.unidata.util;resolution:=optional,
-              com.jmatio.io;resolution:=optional,
-              com.google.gson;resolution:=optional,
-              visad;resolution:=optional,
-              visad.data;resolution:=optional,
-              visad.data.vis5d;resolution:=optional,
-              visad.jmet;resolution:=optional,
-              visad.util;resolution:=optional,
-              colorspace;resolution:=optional,
-              com.sun.jna;resolution:=optional,
-              com.sun.jna.ptr;resolution:=optional,
-              icc;resolution:=optional,
-              jj2000.j2k.codestream;resolution:=optional,
-              jj2000.j2k.codestream.reader;resolution:=optional,
-              jj2000.j2k.decoder;resolution:=optional,
-              jj2000.j2k.entropy.decoder;resolution:=optional,
-              jj2000.j2k.fileformat.reader;resolution:=optional,
-              jj2000.j2k.image;resolution:=optional,
-              jj2000.j2k.image.invcomptransf;resolution:=optional,
-              jj2000.j2k.image.output;resolution:=optional,
-              jj2000.j2k.io;resolution:=optional,
-              jj2000.j2k.quantization.dequantizer;resolution:=optional,
-              jj2000.j2k.roi;resolution:=optional,
-              jj2000.j2k.util;resolution:=optional,
-              jj2000.j2k.wavelet.synthesis;resolution:=optional,
-              org.itadaki.bzip2;resolution:=optional,
-              org.jsoup;resolution:=optional,
-              org.jsoup.nodes;resolution:=optional,
-              org.jsoup.select;resolution:=optional,
-              thredds.featurecollection;resolution:=optional,
-              thredds.filesystem;resolution:=optional,
-              thredds.inventory;resolution:=optional,
-              thredds.inventory.filter;resolution:=optional,
-              thredds.inventory.partition;resolution:=optional,
-              com.beust.jcommander;resolution:=optional,
-              com.google.common.base;resolution:=optional,
-              com.google.common.math;resolution:=optional,
-              org.apache.http;resolution:=optional,
-              org.apache.http.client.utils;resolution:=optional,
-              org.joda.time;resolution:=optional,
-              org.joda.time.chrono;resolution:=optional,
-              org.joda.time.field;resolution:=optional,
-              org.joda.time.format;resolution:=optional,
-              sun.reflect.generics.reflectiveObjects;resolution:=optional,
-              org.apache.http.auth;resolution:=optional,
-              org.apache.http.client;resolution:=optional,
-              org.apache.http.client.entity;resolution:=optional,
-              org.apache.http.client.methods;resolution:=optional,
-              org.apache.http.conn;resolution:=optional,
-              org.apache.http.conn.scheme;resolution:=optional,
-              org.apache.http.cookie;resolution:=optional,
-              org.apache.http.entity;resolution:=optional,
-              org.apache.http.impl.client;resolution:=optional,
-              org.apache.http.impl.conn;resolution:=optional,
-              org.apache.http.message;resolution:=optional,
-              org.apache.http.params;resolution:=optional,
-              org.apache.http.protocol;resolution:=optional,
-              org.apache.http.util;resolution:=optional
-            </Import-Package>
-          </instructions>
-        </configuration>
-      </plugin>
-      <!-- TIKA-763: Workaround to avoid including LGPL classes -->
-      <plugin>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>prepare-package</phase>
-            <goals>
-              <goal>unpack-dependencies</goal>
-            </goals>
-            <configuration>
-              <includeArtifactIds>netcdf</includeArtifactIds>
-              <excludes>
-                ucar/nc2/iosp/fysat/Fysat*.class,
-                ucar/nc2/dataset/transform/VOceanSG1*class,
-                ucar/unidata/geoloc/vertical/OceanSG*.class,
-                META-INF/**,CHANGES,README
-              </excludes>
-              <outputDirectory>
-                ${project.build.directory}/classes
-              </outputDirectory>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- The Tika Bundle has no java code of its own, so no need to do -->
-      <!--  any forbidden API checking against it (it gets confused...) -->
-      <plugin>
-        <groupId>de.thetaphi</groupId>
-        <artifactId>forbiddenapis</artifactId>
-        <configuration>
-          <skip>true</skip>
-        </configuration>
-      </plugin>
-
-      <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>pre-integration-test</phase>
-            <goals>
-              <goal>single</goal>
-            </goals>
-            <configuration>
-              <descriptor>test-bundles.xml</descriptor>
-              <finalName>test</finalName>
-              <attach>false</attach>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <artifactId>maven-failsafe-plugin</artifactId>
-        <version>2.10</version>
-        <executions>
-          <execution>
-            <goals>
-              <goal>integration-test</goal>
-              <goal>verify</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <systemPropertyVariables>
-            <org.ops4j.pax.logging.DefaultServiceLog.level>
-              WARN
-            </org.ops4j.pax.logging.DefaultServiceLog.level>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
+	    <plugins>
+	      <plugin>
+	        <groupId>org.apache.felix</groupId>
+	        <artifactId>maven-bundle-plugin</artifactId>
+	        <extensions>true</extensions>
+	        <configuration>
+	          <instructions>
+	            <_runsystempackages>com.sun.xml.bind.marshaller, com.sun.xml.internal.bind.marshaller</_runsystempackages>
+	            <Bundle-Activator>
+	              org.apache.tika.parser.internal.Activator
+	            </Bundle-Activator>
+	            <Embed-Dependency>
+	              tika-parsers;inline=true,
+	              commons-compress, xz, commons-codec, commons-csv,
+	              commons-io, commons-exec, junrar,
+	              pdfbox,pdfbox-tools,pdfbox-debugger,fontbox,jempbox,bcmail-jdk15on,bcprov-jdk15on,bcpkix-jdk15on,
+	              poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas,
+	              curvesapi,
+	              xmlbeans,
+	              jackcess,
+	              commons-lang,
+	              tagsoup,
+	              asm,
+	              juniversalchardet,
+	              vorbis-java-core, vorbis-java-tika,
+	              isoparser,
+	              metadata-extractor, xmpcore, json-simple,
+	              boilerpipe, rome, rome-utils, opennlp-tools, opennlp-maxent,
+	              geoapi, sis-metadata, sis-netcdf, sis-utility,
+	              sis-storage, apache-mime4j-core, apache-mime4j-dom,
+	              jsr-275, jhighlight, java-libpst, jwnl,
+	              netcdf4, grib, cdm, httpservices, jcip-annotations,
+	              jmatio, guava
+	            </Embed-Dependency>
+	            <Embed-Transitive>true</Embed-Transitive>
+	            <Bundle-DocURL>${project.url}</Bundle-DocURL>
+	            <Export-Package>
+	              !org.apache.tika.parser,
+	              !org.apache.tika.parser.external,
+	              org.apache.tika.parser.*,
+	            </Export-Package>
+	            <Import-Package>
+	              !org.junit,
+	              !org.junit.*,
+	              !junit.*,
+	              !org.apache.ctakes.*,
+	              !org.apache.uima.*,
+	              *,
+	              org.apache.tika.fork,
+	              android.util;resolution:=optional,
+	              com.adobe.xmp;resolution:=optional,
+	              com.adobe.xmp.properties;resolution:=optional,
+	              com.google.protobuf;resolution:=optional,
+	              com.ibm.icu.text;resolution:=optional,
+	              com.sleepycat.je;resolution:=optional,
+	              com.sun.javadoc;resolution:=optional,
+	              com.sun.xml.bind.marshaller;resolution:=optional,
+	              com.sun.xml.internal.bind.marshaller;resolution:=optional,
+	              com.sun.msv.datatype;resolution:=optional,
+	              com.sun.msv.datatype.xsd;resolution:=optional,
+	              com.sun.tools.javadoc;resolution:=optional,
+	              edu.wisc.ssec.mcidas;resolution:=optional,
+	              edu.wisc.ssec.mcidas.adde;resolution:=optional,
+	              javax.activation;resolution:=optional,
+	              javax.annotation;resolution:=optional,
+	              javax.mail;resolution:=optional,
+	              javax.mail.internet;resolution:=optional,
+	              javax.servlet.annotation;resolution:=optional,
+	              javax.servlet;resolution:=optional,
+	              javax.servlet.http;resolution:=optional,
+	              javax.measure.converter;resolution:=optional,
+	              javax.ws.rs.core;resolution:=optional,
+	              net.sf.ehcache;resolution:=optional,
+	              nu.xom;resolution:=optional,
+	              opendap.dap.http;resolution:=optional,
+	              opendap.dap;resolution:=optional,
+	              opendap.dap.parser;resolution:=optional,
+	              opennlp.maxent;resolution:=optional,
+	              opennlp.tools.namefind;resolution:=optional,
+	              net.didion.jwnl;resolution:=optional,
+	              org.apache.cxf.jaxrs.client;resolution:=optional,
+	              org.apache.cxf.jaxrs.ext.multipart;resolution:=optional,
+	              org.apache.commons.exec;resolution:=optional,
+	              org.apache.commons.io;resolution:=optional,
+	              org.apache.commons.httpclient;resolution:=optional,
+	              org.apache.commons.httpclient.auth;resolution:=optional,
+	              org.apache.commons.httpclient.methods;resolution:=optional,
+	              org.apache.commons.httpclient.params;resolution:=optional,
+	              org.apache.commons.httpclient.protocol;resolution:=optional,
+	              org.apache.commons.httpclient.util;resolution:=optional,
+	              org.apache.commons.vfs2;resolution:=optional,
+	              org.apache.commons.vfs2.provider;resolution:=optional,
+	              org.apache.commons.vfs2.util;resolution:=optional,
+	              org.apache.crimson.jaxp;resolution:=optional,
+	              org.apache.jcp.xml.dsig.internal.dom;resolution:=optional,
+	              org.apache.sis;resolution:=optional,
+	              org.apache.sis.distance;resolution:=optional,
+	              org.apache.sis.geometry;resolution:=optional,
+	              org.apache.tools.ant;resolution:=optional,
+	              org.apache.tools.ant.taskdefs;resolution:=optional,
+	              org.apache.tools.ant.types;resolution:=optional,
+	              org.apache.xerces.parsers;resolution:=optional,
+	              org.apache.xerces.util;resolution:=optional,
+	              org.apache.xerces.xni;resolution:=optional,
+	              org.apache.xerces.xni.parser;resolution:=optional,
+	              org.apache.xml.resolver;resolution:=optional,
+	              org.apache.xml.resolver.tools;resolution:=optional,
+	              org.apache.xml.security;resolution:=optional,
+	              org.apache.xml.security.c14n;resolution:=optional,
+	              org.apache.xml.security.utils;resolution:=optional,
+	              org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
+	              org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,
+	              org.bouncycastle.cert;resolution:=optional,
+	              org.bouncycastle.cert.jcajce;resolution:=optional,
+	              org.bouncycastle.cert.ocsp;resolution:=optional,
+	              org.bouncycastle.cms.bc;resolution:=optional,
+	              org.bouncycastle.operator;resolution:=optional,
+	              org.bouncycastle.operator.bc;resolution:=optional,
+	              org.bouncycastle.tsp;resolution:=optional,
+	              org.cyberneko.html.xercesbridge;resolution:=optional,
+	              org.etsi.uri.x01903.v14;resolution:=optional,
+	              org.ibex.nestedvm;resolution:=optional,
+	              org.gjt.xpp;resolution:=optional,
+	              org.jaxen;resolution:=optional,
+	              org.jaxen.dom4j;resolution:=optional,
+	              org.jaxen.pattern;resolution:=optional,
+	              org.jaxen.saxpath;resolution:=optional,
+	              org.jdom;resolution:=optional,
+	              org.jdom.input;resolution:=optional,
+	              org.jdom.output;resolution:=optional,
+	              org.jdom2;resolution:=optional,
+	              org.jdom2.input;resolution:=optional,
+	              org.jdom2.input.sax;resolution:=optional,
+	              org.jdom2.output;resolution:=optional,
+	              org.jdom2.filter;resolution:=optional,
+	              org.json.simple;resolution:=optional,
+	              org.json;resolution:=optional,
+	              org.openxmlformats.schemas.officeDocument.x2006.math;resolution:=optional,
+	              org.openxmlformats.schemas.schemaLibrary.x2006.main;resolution:=optional,
+	              org.osgi.framework;resolution:=optional,
+	              org.quartz;resolution:=optional,
+	              org.quartz.impl;resolution:=optional,
+	              org.slf4j;resolution:=optional,
+	              org.sqlite;resolution:=optional,
+	              org.w3c.dom;resolution:=optional,
+	              org.relaxng.datatype;resolution:=optional,
+	              org.xml.sax;resolution:=optional,
+	              org.xml.sax.ext;resolution:=optional,
+	              org.xml.sax.helpers;resolution:=optional,
+	              org.xmlpull.v1;resolution:=optional,
+	              com.microsoft.schemas.office.powerpoint;resolution:=optional,
+	              com.microsoft.schemas.office.word;resolution:=optional,              sun.misc;resolution:=optional,
+	              ucar.units;resolution:=optional,
+	              ucar.httpservices;resolution:=optional,
+	              ucar.nc2.util;resolution:=optional,
+	              ucar.nc2.util.cache;resolution:=optional,
+	              ucar.nc2.dataset;resolution:=optional,
+	              ucar.nc2;resolution:=optional,
+	              ucar.nc2.constants;resolution:=optional,
+	              ucar.nc2.dt;resolution:=optional,
+	              ucar.nc2.dt.grid;resolution:=optional,
+	              ucar.nc2.ft;resolution:=optional,
+	              ucar.nc2.iosp;resolution:=optional,
+	              ucar.nc2.iosp.hdf4;resolution:=optional,
+	              ucar.nc2.ncml;resolution:=optional,
+	              ucar.nc2.stream;resolution:=optional,
+	              ucar.nc2.time;resolution:=optional,
+	              ucar.nc2.units;resolution:=optional,
+	              ucar.nc2.wmo;resolution:=optional,
+	              ucar.nc2.write;resolution:=optional,
+	              ucar.ma2;resolution:=optional,
+	              ucar.grib;resolution:=optional,
+	              ucar.grib.grib1;resolution:=optional,
+	              ucar.grib.grib2;resolution:=optional,
+	              ucar.grid;resolution:=optional,
+	              ucar.unidata.geoloc;resolution:=optional,
+	              ucar.unidata.geoloc.projection;resolution:=optional,
+	              ucar.unidata.geoloc.projection.proj4;resolution:=optional,
+	              ucar.unidata.geoloc.projection.sat;resolution:=optional,
+	              ucar.unidata.io;resolution:=optional,
+	              ucar.unidata.util;resolution:=optional,
+	              com.jmatio.io;resolution:=optional,
+	              com.google.gson;resolution:=optional,
+	              visad;resolution:=optional,
+	              visad.data;resolution:=optional,
+	              visad.data.vis5d;resolution:=optional,
+	              visad.jmet;resolution:=optional,
+	              visad.util;resolution:=optional,
+	              colorspace;resolution:=optional,
+	              com.sun.jna;resolution:=optional,
+	              com.sun.jna.ptr;resolution:=optional,
+	              icc;resolution:=optional,
+	              jj2000.j2k.codestream;resolution:=optional,
+	              jj2000.j2k.codestream.reader;resolution:=optional,
+	              jj2000.j2k.decoder;resolution:=optional,
+	              jj2000.j2k.entropy.decoder;resolution:=optional,
+	              jj2000.j2k.fileformat.reader;resolution:=optional,
+	              jj2000.j2k.image;resolution:=optional,
+	              jj2000.j2k.image.invcomptransf;resolution:=optional,
+	              jj2000.j2k.image.output;resolution:=optional,
+	              jj2000.j2k.io;resolution:=optional,
+	              jj2000.j2k.quantization.dequantizer;resolution:=optional,
+	              jj2000.j2k.roi;resolution:=optional,
+	              jj2000.j2k.util;resolution:=optional,
+	              jj2000.j2k.wavelet.synthesis;resolution:=optional,
+	              org.itadaki.bzip2;resolution:=optional,
+	              org.jsoup;resolution:=optional,
+	              org.jsoup.nodes;resolution:=optional,
+	              org.jsoup.select;resolution:=optional,
+	              thredds.featurecollection;resolution:=optional,
+	              thredds.filesystem;resolution:=optional,
+	              thredds.inventory;resolution:=optional,
+	              thredds.inventory.filter;resolution:=optional,
+	              thredds.inventory.partition;resolution:=optional,
+	              com.beust.jcommander;resolution:=optional,
+	              com.google.common.base;resolution:=optional,
+	              com.google.common.math;resolution:=optional,
+	              org.apache.http;resolution:=optional,
+	              org.apache.http.client.utils;resolution:=optional,
+	              org.joda.time;resolution:=optional,
+	              org.joda.time.chrono;resolution:=optional,
+	              org.joda.time.field;resolution:=optional,
+	              org.joda.time.format;resolution:=optional,
+	              sun.reflect.generics.reflectiveObjects;resolution:=optional,
+	              org.apache.http.auth;resolution:=optional,
+	              org.apache.http.client;resolution:=optional,
+	              org.apache.http.client.entity;resolution:=optional,
+	              org.apache.http.client.methods;resolution:=optional,
+	              org.apache.http.conn;resolution:=optional,
+	              org.apache.http.conn.scheme;resolution:=optional,
+	              org.apache.http.cookie;resolution:=optional,
+	              org.apache.http.entity;resolution:=optional,
+	              org.apache.http.impl.client;resolution:=optional,
+	              org.apache.http.impl.conn;resolution:=optional,
+	              org.apache.http.message;resolution:=optional,
+	              org.apache.http.params;resolution:=optional,
+	              org.apache.http.protocol;resolution:=optional,
+	              org.apache.http.util;resolution:=optional
+	            </Import-Package>
+	          </instructions>
+	        </configuration>
+	      </plugin>
+	      <!-- TIKA-763: Workaround to avoid including LGPL classes -->
+	      <plugin>
+	        <artifactId>maven-dependency-plugin</artifactId>
+	        <executions>
+	          <execution>
+	            <phase>prepare-package</phase>
+	            <goals>
+	              <goal>unpack-dependencies</goal>
+	            </goals>
+	            <configuration>
+	              <includeArtifactIds>netcdf</includeArtifactIds>
+	              <excludes>
+	                ucar/nc2/iosp/fysat/Fysat*.class,
+	                ucar/nc2/dataset/transform/VOceanSG1*class,
+	                ucar/unidata/geoloc/vertical/OceanSG*.class,
+	                META-INF/**,CHANGES,README
+	              </excludes>
+	              <outputDirectory>
+	                ${project.build.directory}/classes
+	              </outputDirectory>
+	            </configuration>
+	          </execution>
+	        </executions>
+	      </plugin>
+	
+	      <!-- The Tika Bundle has no java code of its own, so no need to do -->
+	      <!--  any forbidden API checking against it (it gets confused...) -->
+	      <plugin>
+	        <groupId>de.thetaphi</groupId>
+	        <artifactId>forbiddenapis</artifactId>
+	        <configuration>
+	          <skip>true</skip>
+	        </configuration>
+	      </plugin>
+	
+	      <plugin>
+	        <artifactId>maven-assembly-plugin</artifactId>
+	        <executions>
+	          <execution>
+	            <phase>pre-integration-test</phase>
+	            <goals>
+	              <goal>single</goal>
+	            </goals>
+	            <configuration>
+	              <descriptor>test-bundles.xml</descriptor>
+	              <finalName>test</finalName>
+	              <attach>false</attach>
+	            </configuration>
+	          </execution>
+	        </executions>
+	      </plugin>
+	
+	      <plugin>
+	        <artifactId>maven-failsafe-plugin</artifactId>
+	        <version>2.10</version>
+	        <executions>
+	          <execution>
+	            <goals>
+	              <goal>integration-test</goal>
+	              <goal>verify</goal>
+	            </goals>
+	          </execution>
+	        </executions>
+	        <configuration>
+	          <systemPropertyVariables>
+	            <org.ops4j.pax.logging.DefaultServiceLog.level>
+	              WARN
+	            </org.ops4j.pax.logging.DefaultServiceLog.level>
+	          </systemPropertyVariables>
+	        </configuration>
+	      </plugin>
+	    </plugins>
     </pluginManagement>
   </build>
 

http://git-wip-us.apache.org/repos/asf/tika/blob/de84d71b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 1280aec..dae4a64 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -160,7 +160,7 @@ public class TesseractOCRParser extends AbstractParser {
         
     	boolean hasPython = false;
     	
-    	try {
+		try {
 			Process proc = Runtime.getRuntime().exec("python -h");
 			BufferedReader stdInput = new BufferedReader(new InputStreamReader(proc.getInputStream()));
 			if(stdInput.read() != -1) {
@@ -169,7 +169,7 @@ public class TesseractOCRParser extends AbstractParser {
 		} catch (IOException e) {
 			e.printStackTrace();
 		} 
-    	
+
 		return hasPython;	
     }
     
@@ -283,22 +283,22 @@ public class TesseractOCRParser extends AbstractParser {
         // determine the angle of rotation required to make the text horizontal
         CommandLine cmdLine = CommandLine.parse(cmd);
         if(hasPython()) {
-        	try {
-        		executor.execute(cmdLine);
-        		angle = outputStream.toString().trim();
-            } catch(Exception e) {	
-        		e.printStackTrace();
-        	}
+			try {
+				executor.execute(cmdLine);
+				angle = outputStream.toString().trim();
+			} catch(Exception e) {	
+				e.printStackTrace();
+			}
         }
               
         // process the image - parameter values can be set in TesseractOCRConfig.properties
     	String line = "convert -density " + config.getDensity() + " -depth " + config.getDepth() + " -colorspace " + config.getColorspace() +  " -filter " + config.getFilter() + " -resize " + config.getResize() + "% -rotate "+ angle + " " + streamingObject.getAbsolutePath() + " " + streamingObject.getAbsolutePath();    	
         cmdLine = CommandLine.parse(line);
-        try {
-    		executor.execute(cmdLine);
-        } catch(Exception e) {	
-    		e.printStackTrace();
-    	} 
+		try {
+			executor.execute(cmdLine);
+		} catch(Exception e) {	
+			e.printStackTrace();
+		} 
        
         tmp.close();
     }