You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/07/08 18:21:35 UTC

[1/2] tika git commit: TIKA-2030 - add handling for element to ODT parser. Thanks to David Pilato for opening this issue.

Repository: tika
Updated Branches:
  refs/heads/2.x 573527bbc -> cdfacdb41


TIKA-2030 - add handling for <text:s/> element to ODT parser. Thanks to David Pilato for opening this issue.


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/87e1e23b
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/87e1e23b
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/87e1e23b

Branch: refs/heads/2.x
Commit: 87e1e23b46dc68b24288b778e040a3aa55e05628
Parents: 2a7e52e
Author: tballison <ta...@mitre.org>
Authored: Fri Jul 8 14:21:16 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Fri Jul 8 14:21:16 2016 -0400

----------------------------------------------------------------------
 .../parser/odf/OpenDocumentContentParser.java   |   5 +++++
 .../test-documents/testOpenOffice2.odt          | Bin 26448 -> 27554 bytes
 2 files changed, 5 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/87e1e23b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
index a32d406..a149dd8 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
@@ -68,6 +68,9 @@ public class OpenDocumentContentParser extends AbstractParser {
 
     private static final class OpenDocumentElementMappingContentHandler extends
             ElementMappingContentHandler {
+
+        private static final char[] SPACE = new char[]{ ' '};
+
         private final ContentHandler handler;
         private final BitSet textNodeStack = new BitSet();
         private int nodeDepth = 0;
@@ -283,6 +286,8 @@ public class OpenDocumentContentParser extends AbstractParser {
                     startList(attrs.getValue(TEXT_NS, "style-name"));
                 } else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
                     startSpan(attrs.getValue(TEXT_NS, "style-name"));
+                } else if (TEXT_NS.equals(namespaceURI) && "s".equals(localName)) {
+                    handler.characters(SPACE, 0, 1);
                 } else {
                     super.startElement(namespaceURI, localName, qName, attrs);
                 }

http://git-wip-us.apache.org/repos/asf/tika/blob/87e1e23b/tika-test-resources/src/test/resources/test-documents/testOpenOffice2.odt
----------------------------------------------------------------------
diff --git a/tika-test-resources/src/test/resources/test-documents/testOpenOffice2.odt b/tika-test-resources/src/test/resources/test-documents/testOpenOffice2.odt
index bc31925..f6c72b6 100644
Binary files a/tika-test-resources/src/test/resources/test-documents/testOpenOffice2.odt and b/tika-test-resources/src/test/resources/test-documents/testOpenOffice2.odt differ


[2/2] tika git commit: Merge remote-tracking branch 'origin/2.x' into 2.x

Posted by ta...@apache.org.
Merge remote-tracking branch 'origin/2.x' into 2.x


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/cdfacdb4
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/cdfacdb4
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/cdfacdb4

Branch: refs/heads/2.x
Commit: cdfacdb4186a7a113f0c518df1ed94bcc16ff252
Parents: 87e1e23 573527b
Author: tballison <ta...@mitre.org>
Authored: Fri Jul 8 14:21:30 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Fri Jul 8 14:21:30 2016 -0400

----------------------------------------------------------------------
 .../apache/tika/parser/geo/topic/GeoParser.java | 43 +++++++++++---------
 .../tika/parser/geo/topic/GeoParserConfig.java  |  4 +-
 .../apache/tika/parser/geo/topic/GeoTag.java    | 33 +++++++--------
 .../parser/geo/topic/NameEntityExtractor.java   | 11 ++---
 4 files changed, 48 insertions(+), 43 deletions(-)
----------------------------------------------------------------------