You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/03/02 06:42:21 UTC

[12/20] tika git commit: fix for TIKA-1876 contributed by manalishah

fix for TIKA-1876 contributed by manalishah


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/7ebe007e
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/7ebe007e
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/7ebe007e

Branch: refs/heads/master
Commit: 7ebe007ec03088449f67619ef1e6cb564178b14b
Parents: a13369b
Author: manali <ma...@gmail.com>
Authored: Fri Feb 26 18:36:02 2016 -0800
Committer: manali <ma...@gmail.com>
Committed: Fri Feb 26 18:36:02 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                          |  2 ++
 .../src/main/java/org/apache/tika/mime/MimeType.java |  1 -
 .../org/apache/tika/mime/tika-mimetypes.xml          | 13 ++++++-------
 .../tika/parser/microsoft/ooxml/XWPFListManager.java |  4 ++++
 .../org/apache/tika/parser/ner/NERecogniser.java     |  2 --
 .../apache/tika/server/RichTextContentHandler.java   | 15 +++++++++++++--
 .../apache/tika/server/resource/TikaResource.java    |  2 +-
 .../tika/server/resource/UnpackerResource.java       |  2 +-
 8 files changed, 27 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index bb30540..0ffc69f 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -9,6 +9,8 @@ Release 1.13 - ???
 
   * Upgrade to Jackson 2.7.1 (TIKA-1869).
 
+  * RichTextContentHandler moved from the Server package to Core (TIKA-1870).
+
 Release 1.12 - 01/24/2016
 
   * Support for iFrames and element link extraction is provided in

http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
index fc520cf..b4d651e 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
@@ -270,7 +270,6 @@ public final class MimeType implements Comparable<MimeType>, Serializable {
         }
     }
 
-
     void addMagic(Magic magic) {
         if (magic == null) {
             return;

http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 5bb30fc..95f41e6 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -38,12 +38,6 @@
 -->
 <mime-info>
 
-  <mime-type type="application/dicom">
-    <_comment>DICOM medical imaging data</_comment>
-    <magic priority="50">
-      <match value="DICM" type="string" offset="128"/>
-    </magic>
-  </mime-type>
   <mime-type type="application/activemessage"/>
   <mime-type type="application/andrew-inset">
     <glob pattern="*.ez"/>
@@ -118,7 +112,12 @@
   <mime-type type="application/dec-dx"/>
   <mime-type type="application/dialog-info+xml"/>
 
-
+  <mime-type type="application/dicom">
+    <_comment>DICOM medical imaging data</_comment>
+    <magic priority="50">
+      <match value="DICM" type="string" offset="128"/>
+    </magic>
+  </mime-type>
 
   <mime-type type="application/dita+xml">
     <sub-class-of type="application/xml"/>

http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
index 5654378..a938c2f 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
@@ -57,6 +57,10 @@ public class XWPFListManager extends AbstractListManager {
      * @return the formatted number or an empty string if something went wrong
      */
     public String getFormattedNumber(final XWPFParagraph paragraph) {
+        if (numbering == null) {
+            return "";
+        }
+
         int currNumId = paragraph.getNumID().intValue();
         XWPFNum xwpfNum = numbering.getNum(paragraph.getNumID());
         if (xwpfNum == null) {

http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
index 3bebff2..c4693eb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
@@ -36,8 +36,6 @@ public interface NERecogniser {
     String DATE = "DATE";
     String PERCENT = "PERCENT";
     String MONEY = "MONEY";
-    String FACILITY = "FACILITY";
-    String GPE = "GPE";
 
     /**
      * checks if this Named Entity recogniser is available for service

http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java b/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java
index 81095a7..8fcc4d5 100644
--- a/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java
+++ b/tika-server/src/main/java/org/apache/tika/server/RichTextContentHandler.java
@@ -15,15 +15,26 @@
  * limitations under the License.
  */
 
-package org.apache.tika.server;
+package org.apache.tika.sax;
 
 import java.io.Writer;
 
-import org.apache.tika.sax.WriteOutContentHandler;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 
+/**
+ * Content handler for Rich Text, it will extract XHTML &lt;img/&gt;
+ * tag &lt;alt/&gt; attribute and XHTML &lt;a/&gt; tag &lt;name/&gt;
+ * attribute into the output.
+ */
 public class RichTextContentHandler extends WriteOutContentHandler {
+
+    /**
+     * Creates a content handler that writes XHTML body character events to
+     * the given writer.
+     *
+     * @param writer writer
+     */
     public RichTextContentHandler(Writer writer) {
         super(writer);
     }

http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
index d74ef74..566203a 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
@@ -72,7 +72,7 @@ import org.apache.tika.parser.ocr.TesseractOCRConfig;
 import org.apache.tika.parser.pdf.PDFParserConfig;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ExpandedTitleContentHandler;
-import org.apache.tika.server.RichTextContentHandler;
+import org.apache.tika.sax.RichTextContentHandler;
 import org.apache.tika.server.TikaServerParseException;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;

http://git-wip-us.apache.org/repos/asf/tika/blob/7ebe007e/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
----------------------------------------------------------------------
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
index cf3a0e9..8ee516e 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
@@ -58,7 +58,7 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.microsoft.OfficeParser;
 import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.server.RichTextContentHandler;
+import org.apache.tika.sax.RichTextContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;