You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/03/02 06:42:21 UTC
[12/20] tika git commit: fix for TIKA-1876 contributed by manalishah
fix for TIKA-1876 contributed by manalishah
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/7ebe007e
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/7ebe007e
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/7ebe007e
Branch: refs/heads/master
Commit: 7ebe007ec03088449f67619ef1e6cb564178b14b
Parents: a13369b
Author: manali <ma...@gmail.com>
Authored: Fri Feb 26 18:36:02 2016 -0800
Committer: manali <ma...@gmail.com>
Committed: Fri Feb 26 18:36:02 2016 -0800
----------------------------------------------------------------------
CHANGES.txt | 2 ++
.../src/main/java/org/apache/tika/mime/MimeType.java | 1 -
.../org/apache/tika/mime/tika-mimetypes.xml | 13 ++++++-------
.../tika/parser/microsoft/ooxml/XWPFListManager.java | 4 ++++
.../org/apache/tika/parser/ner/NERecogniser.java | 2 --
.../apache/tika/server/RichTextContentHandler.java | 15 +++++++++++++--
.../apache/tika/server/resource/TikaResource.java | 2 +-
.../tika/server/resource/UnpackerResource.java | 2 +-
8 files changed, 27 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index bb30540..0ffc69f 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -9,6 +9,8 @@ Release 1.13 - ???
* Upgrade to Jackson 2.7.1 (TIKA-1869).
+ * RichTextContentHandler moved from the Server package to Core (TIKA-1870).
+
Release 1.12 - 01/24/2016
* Support for iFrames and element link extraction is provided in
http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
index fc520cf..b4d651e 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
@@ -270,7 +270,6 @@ public final class MimeType implements Comparable<MimeType>, Serializable {
}
}
-
void addMagic(Magic magic) {
if (magic == null) {
return;
http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 5bb30fc..95f41e6 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -38,12 +38,6 @@
-->
<mime-info>
- <mime-type type="application/dicom">
- <_comment>DICOM medical imaging data</_comment>
- <magic priority="50">
- <match value="DICM" type="string" offset="128"/>
- </magic>
- </mime-type>
<mime-type type="application/activemessage"/>
<mime-type type="application/andrew-inset">
<glob pattern="*.ez"/>
@@ -118,7 +112,12 @@
<mime-type type="application/dec-dx"/>
<mime-type type="application/dialog-info+xml"/>
-
+ <mime-type type="application/dicom">
+ <_comment>DICOM medical imaging data</_comment>
+ <magic priority="50">
+ <match value="DICM" type="string" offset="128"/>
+ </magic>
+ </mime-type>
<mime-type type="application/dita+xml">
<sub-class-of type="application/xml"/>
http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
index 5654378..a938c2f 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
@@ -57,6 +57,10 @@ public class XWPFListManager extends AbstractListManager {
* @return the formatted number or an empty string if something went wrong
*/
public String getFormattedNumber(final XWPFParagraph paragraph) {
+ if (numbering == null) {
+ return "";
+ }
+
int currNumId = paragraph.getNumID().intValue();
XWPFNum xwpfNum = numbering.getNum(paragraph.getNumID());
if (xwpfNum == null) {
http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
index 3bebff2..c4693eb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
@@ -36,8 +36,6 @@ public interface NERecogniser {
String DATE = "DATE";
String PERCENT = "PERCENT";
String MONEY = "MONEY";
- String FACILITY = "FACILITY";
- String GPE = "GPE";
/**
* checks if this Named Entity recogniser is available for service
http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java b/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java
index 81095a7..8fcc4d5 100644
--- a/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java
+++ b/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java
@@ -15,15 +15,26 @@
* limitations under the License.
*/
-package org.apache.tika.server;
+package org.apache.tika.sax;
import java.io.Writer;
-import org.apache.tika.sax.WriteOutContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
+/**
+ * Content handler for Rich Text, it will extract XHTML <img/>
+ * tag <alt/> attribute and XHTML <a/> tag <name/>
+ * attribute into the output.
+ */
public class RichTextContentHandler extends WriteOutContentHandler {
+
+ /**
+ * Creates a content handler that writes XHTML body character events to
+ * the given writer.
+ *
+ * @param writer writer
+ */
public RichTextContentHandler(Writer writer) {
super(writer);
}
http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
index d74ef74..566203a 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
@@ -72,7 +72,7 @@ import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
-import org.apache.tika.server.RichTextContentHandler;
+import org.apache.tika.sax.RichTextContentHandler;
import org.apache.tika.server.TikaServerParseException;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
index cf3a0e9..8ee516e 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
@@ -58,7 +58,7 @@ import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.server.RichTextContentHandler;
+import org.apache.tika.sax.RichTextContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;