You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/12/14 16:51:41 UTC
[tika] branch branch_1x updated (7696e38 -> 4c9e38e)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git.
from 7696e38 TIKA-2792 -- revert mp4 parser based on large scale regression test results
add 8c88966 TIKA-2798 -- improve reporting for attachment diffs
add 4c9e38e TIKA-2791 -- add tags/structure to tika-eval
No new revisions were added by this update.
Summary of changes:
.../sax/AbstractRecursiveParserWrapperHandler.java | 4 +
.../tika/sax/RecursiveParserWrapperHandler.java | 1 +
tika-eval/pom.xml | 6 +-
.../org/apache/tika/eval/AbstractProfiler.java | 142 +++++++++++++++++----
.../java/org/apache/tika/eval/ExtractComparer.java | 30 ++++-
.../java/org/apache/tika/eval/ExtractProfiler.java | 27 +++-
.../tika/eval/batch/ExtractComparerBuilder.java | 2 +
.../tika/eval/batch/ExtractProfilerBuilder.java | 1 +
.../main/java/org/apache/tika/eval/db/Cols.java | 22 +++-
.../org/apache/tika/eval/io/ExtractReader.java | 71 +++++++----
.../apache/tika/eval/util/ContentTagParser.java | 89 +++++++++++++
.../org/apache/tika/eval/util/ContentTags.java | 63 +++++++++
.../src/main/resources/comparison-reports.xml | 40 +++++-
.../org/apache/tika/eval/SimpleComparerTest.java | 120 ++++++++++++-----
.../resources/test-dirs/extractsA/file15_tags.json | 41 ++++++
.../test-dirs/extractsA/file16_badTags.json | 41 ++++++
.../test-dirs/extractsA/file17_tagsOutOfOrder.json | 41 ++++++
.../resources/test-dirs/extractsB/file15_tags.html | 31 +++++
.../test-dirs/extractsB/file16_badTags.html | 31 +++++
19 files changed, 707 insertions(+), 96 deletions(-)
create mode 100644 tika-eval/src/main/java/org/apache/tika/eval/util/ContentTagParser.java
create mode 100644 tika-eval/src/main/java/org/apache/tika/eval/util/ContentTags.java
create mode 100644 tika-eval/src/test/resources/test-dirs/extractsA/file15_tags.json
create mode 100644 tika-eval/src/test/resources/test-dirs/extractsA/file16_badTags.json
create mode 100644 tika-eval/src/test/resources/test-dirs/extractsA/file17_tagsOutOfOrder.json
create mode 100644 tika-eval/src/test/resources/test-dirs/extractsB/file15_tags.html
create mode 100644 tika-eval/src/test/resources/test-dirs/extractsB/file16_badTags.html