You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by Apache Wiki <wi...@apache.org> on 2015/03/08 00:30:08 UTC
[Tika Wiki] Update of "IntegratingTikaWithExtractingRequestHandler" by JinghaoCui
Dear Wiki user,
You have subscribed to a wiki page or wiki category on "Tika Wiki" for change notification.
The "IntegratingTikaWithExtractingRequestHandler" page has been changed by JinghaoCui:
https://wiki.apache.org/tika/IntegratingTikaWithExtractingRequestHandler
New page:
= Integrating Tika With ExtractingRequestHandler =
'''0. Because the online apache repositories do not have the 1.8-SNAPSHOT for now, we have to use local maven repository. '''
1. Checkout tike-trunk:
$ svn co https://svn.apache.org/repos/asf/tika/trunk/ tika-trunk
2. Build Tika
$ cd tika-trunk
$ mvn install
4. Download tika-parsers dependencies:
$ cd tika-parsers
$ mvn dependency:copy-dependencies
5. Checkout lucene-solr-4-10
$ svn checkout http://svn.apache.org/repos/asf/lucene/dev/branches/lucene_solr_4_10 lucene_solr_4_10
6. Modified the lucene_solr_4_10/lucene/ivy-setting.xml by uncommenting line 45-52:
<filesystem name="local-maven-2" m2compatible="true" local="true">
<artifact
pattern="${local-maven2-dir}/[organisation]/[module]/[revision]/[module]-[revision].[ext] " />
<ivy
pattern="${local-maven2-dir}/[organisation]/[module]/[revision]/[module]-[revision].pom" />
</filesystem>
7. Replace the lucene_solr_4_10/solr/contrib/extraction/ivy.xml to the following ivy.xml:
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<ivy-module version="2.0">
<info organisation="org.apache.solr" module="extraction"/>
<configurations defaultconfmapping="compile->master;test->master">
<conf name="compile" transitive="false"/>
<conf name="test" transitive="false"/>
</configurations>
<dependencies>
<!-- Tika JARs -->
<dependency org="org.apache.tika" name="tika-core" rev="1.8-SNAPSHOT" conf="compile"/>
<dependency org="org.apache.tika" name="tika-parsers" rev="1.8-SNAPSHOT" conf="compile"/>
<dependency org="org.apache.tika" name="tika-xmp" rev="1.8-SNAPSHOT" conf="compile"/>
<!-- Tika dependencies - see http://tika.apache.org/1.3/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
<!-- When upgrading Tika, upgrade dependencies versions and add any new ones
(except slf4j-api, commons-codec, commons-logging, commons-httpclient, geronimo-stax-api_1.0_spec, jcip-annotations, xml-apis, asm)
WARNING: Don't add netcdf / unidataCommon (partially LGPL code) -->
<dependency org="org.gagravarr" name="vorbis-java-tika" rev="0.6" conf="compile"/>
<dependency org="org.gagravarr" name="vorbis-java-core" rev="0.6" conf="compile"/>
<dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7.2" conf="compile"/>
<dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7.2" conf="compile"/>
<dependency org="org.apache.commons" name="commons-compress" rev="1.9" conf="compile"/>
<dependency org="org.apache.pdfbox" name="pdfbox" rev="1.8.8" conf="compile"/>
<dependency org="org.apache.pdfbox" name="fontbox" rev="1.8.8" conf="compile"/>
<dependency org="org.apache.pdfbox" name="jempbox" rev="1.8.8" conf="compile"/>
<dependency org="org.bouncycastle" name="bcmail-jdk15" rev="1.45" conf="compile"/>
<dependency org="org.bouncycastle" name="bcprov-jdk15" rev="1.45" conf="compile"/>
<dependency org="org.apache.poi" name="poi" rev="3.11" conf="compile"/>
<dependency org="org.apache.poi" name="poi-scratchpad" rev="3.11" conf="compile"/>
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.11" conf="compile"/>
<dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.11" conf="compile"/>
<dependency org="org.apache.xmlbeans" name="xmlbeans" rev="2.6.0" conf="compile"/>
<dependency org="dom4j" name="dom4j" rev="${/dom4j/dom4j}" conf="compile"/>
<dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="1.2.1" conf="compile"/>
<dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0.2" conf="compile"/>
<dependency org="org.aspectj" name="aspectjrt" rev="1.8.0" conf="compile"/>
<dependency org="com.drewnoakes" name="metadata-extractor" rev="2.6.2" conf="compile"/>
<dependency org="de.l3s.boilerpipe" name="boilerpipe" rev="1.1.0" conf="compile"/>
<dependency org="rome" name="rome" rev="1.0" conf="compile"/>
<dependency org="jdom" name="jdom" rev="1.0" conf="compile"/>
<dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="1.0.3" conf="compile"/>
<dependency org="org.tukaani" name="xz" rev="1.5" conf="compile"/>
<dependency org="com.adobe.xmp" name="xmpcore" rev="5.1.2" conf="compile"/>
<dependency org="com.uwyn" name="jhighlight" rev="1.0" conf="compile"/>
<!-- Other ExtractingRequestHandler dependencies -->
<dependency org="com.ibm.icu" name="icu4j" rev="${/com.ibm.icu/icu4j}" conf="compile"/>
<dependency org="xerces" name="xercesImpl" rev="${/xerces/xercesImpl}" conf="compile"/>
<dependency org="org.slf4j" name="jcl-over-slf4j" rev="${/org.slf4j/jcl-over-slf4j}" conf="test"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>
8. Compile solr
Change the working directory to lucene_solr_4_10/solr/
$ ant compile
9. Generate new sha1 files for the jars
$ ant jar-checksums
10. Done. Enjoy.