You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2016/09/13 19:45:05 UTC
[1/2] tika git commit: TIKA-2064 Test Stata DTA files from Michael
Stepner, plus detection unit test
Repository: tika
Updated Branches:
refs/heads/master 3c0abc8eb -> 9130bbc1f
TIKA-2064 Test Stata DTA files from Michael Stepner, plus detection unit test
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/2222fe0c
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/2222fe0c
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/2222fe0c
Branch: refs/heads/master
Commit: 2222fe0ce2e1db633bdcf49bd7b24941374f2033
Parents: 3c0abc8
Author: Nick Burch <ni...@gagravarr.org>
Authored: Tue Sep 13 20:41:41 2016 +0100
Committer: Nick Burch <ni...@gagravarr.org>
Committed: Tue Sep 13 20:41:41 2016 +0100
----------------------------------------------------------------------
.../java/org/apache/tika/mime/TestMimeTypes.java | 10 ++++++++++
.../test/resources/test-documents/testStataDTA.dta | Bin 0 -> 1207 bytes
.../test/resources/test-documents/testStataDTA.txt | 15 +++++++++++++++
3 files changed, 25 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/2222fe0c/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index d35a716..ba47434 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -1071,6 +1071,16 @@ public class TestMimeTypes {
assertTypeByData("application/x-endnote-refer", "testEndNoteImportFile.enw");
}
+ @Test
+ public void testStataDTA() throws Exception {
+ // Filename only gives base type
+ assertTypeByName("application/x-stata-dta", "testStataDTA.dta");
+ // With data too, can get specific version
+ assertTypeByData("application/x-stata-dta; version=13", "testStataDTA.dta");
+ // Name + data gets specific version as well
+ assertType("application/x-stata-dta; version=13", "testStataDTA.dta");
+ }
+
private void assertText(byte[] prefix) throws IOException {
assertMagic("text/plain", prefix);
}
http://git-wip-us.apache.org/repos/asf/tika/blob/2222fe0c/tika-parsers/src/test/resources/test-documents/testStataDTA.dta
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testStataDTA.dta b/tika-parsers/src/test/resources/test-documents/testStataDTA.dta
new file mode 100644
index 0000000..92dd695
Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/testStataDTA.dta differ
http://git-wip-us.apache.org/repos/asf/tika/blob/2222fe0c/tika-parsers/src/test/resources/test-documents/testStataDTA.txt
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testStataDTA.txt b/tika-parsers/src/test/resources/test-documents/testStataDTA.txt
new file mode 100644
index 0000000..7270623
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testStataDTA.txt
@@ -0,0 +1,15 @@
+testStataDTA.dta was created on Stata 13.1 running on Mac OS X, from:
+---------------------------------------------------------------------
+clear all
+set obs 3
+
+gen byte integers=_n
+gen double reals = sqrt(_n)
+
+gen fruits = ""
+replace fruits = "apple" in 1
+replace fruits = "banana" in 2
+replace fruits = "cantaloupe" in 3
+
+save stata_test_data.dta
+---------------------------------------------------------------------
[2/2] tika git commit: Changelog update
Posted by ni...@apache.org.
Changelog update
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/9130bbc1
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/9130bbc1
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/9130bbc1
Branch: refs/heads/master
Commit: 9130bbc1fa6d69419b2ad294917260d6b1cced08
Parents: 2222fe0
Author: Nick Burch <ni...@gagravarr.org>
Authored: Tue Sep 13 20:42:13 2016 +0100
Committer: Nick Burch <ni...@gagravarr.org>
Committed: Tue Sep 13 20:42:13 2016 +0100
----------------------------------------------------------------------
CHANGES.txt | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/9130bbc1/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 20a10a7..4f271ac 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -38,6 +38,7 @@ Release 1.14 - ???
* Windows Media Metafile (TIKA-2004)
* iCal and vCalendar (TIKA-2006)
* MBOX (TIKA-2042)
+ * Stata DTA (TIKA-2064)
* Upgrade to PDFBox 2.0.2 (TIKA-1996).