You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/04/11 21:09:01 UTC
[tika] branch master updated: TIKA-2852 -- add reports for missing
files/attachments by mime
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 5156751 TIKA-2852 -- add reports for missing files/attachments by mime
5156751 is described below
commit 5156751a572f2abbea186987e70e0652ab695454
Author: TALLISON <ta...@apache.org>
AuthorDate: Thu Apr 11 17:08:47 2019 -0400
TIKA-2852 -- add reports for missing files/attachments by mime
---
.../src/main/resources/comparison-reports.xml | 90 ++++++++++++++++++++++
1 file changed, 90 insertions(+)
diff --git a/tika-eval/src/main/resources/comparison-reports.xml b/tika-eval/src/main/resources/comparison-reports.xml
index 9896832..c0f96aa 100644
--- a/tika-eval/src/main/resources/comparison-reports.xml
+++ b/tika-eval/src/main/resources/comparison-reports.xml
@@ -1459,6 +1459,96 @@
</sql>
</report>
+ <report reportName="Files missing in B by Mime"
+ reportFilename="attachments/all_files_missing_in_B_by_mime.xlsx"
+ format="xlsx"
+ includeSql="true">
+
+ <sql>
+ select mime_string, count(1) as cnt
+ from profiles_a pa
+ left join profiles_b pb on pa.id=pb.id
+ join mimes m on pa.mime_id=m.mime_id
+ where pb.id is null
+ group by mime_string
+ order by cnt desc
+ </sql>
+ </report>
+ <report reportName="Container files missing in B by Mime"
+ reportFilename="attachments/container_files_missing_in_B_by_mime.xlsx"
+ format="xlsx"
+ includeSql="true">
+
+ <sql>
+ select mime_string, count(1) as cnt
+ from profiles_a pa
+ left join profiles_b pb on pa.id=pb.id
+ join mimes m on pa.mime_id=m.mime_id
+ where pb.id is null and pa.is_embedded=false
+ group by mime_string
+ order by cnt desc
+ </sql>
+ </report>
+ <report reportName="Embedded files missing in B by Mime"
+ reportFilename="attachments/embedded_files_missing_in_B_by_mime.xlsx"
+ format="xlsx"
+ includeSql="true">
+
+ <sql>
+ select mime_string, count(1) as cnt
+ from profiles_a pa
+ left join profiles_b pb on pa.id=pb.id
+ join mimes m on pa.mime_id=m.mime_id
+ where pb.id is null and pa.is_embedded=true
+ group by mime_string
+ order by cnt desc
+ </sql>
+ </report>
+ <report reportName="All files missing in A by Mime"
+ reportFilename="attachments/all_files_missing_in_A_by_mime.xlsx"
+ format="xlsx"
+ includeSql="true">
+
+ <sql>
+ select mime_string, count(1) as cnt
+ from profiles_b pb
+ left join profiles_a pa on pb.id=pa.id
+ join mimes m on pb.mime_id=m.mime_id
+ where pa.id is null
+ group by mime_string
+ order by cnt desc
+ </sql>
+ </report>
+ <report reportName="Container files missing in A by Mime"
+ reportFilename="attachments/container_files_missing_in_A_by_mime.xlsx"
+ format="xlsx"
+ includeSql="true">
+
+ <sql>
+ select mime_string, count(1) as cnt
+ from profiles_b pb
+ left join profiles_a pa on pb.id=pa.id
+ join mimes m on pb.mime_id=m.mime_id
+ where pa.id is null and pb.is_embedded=false
+ group by mime_string
+ order by cnt desc
+ </sql>
+ </report>
+ <report reportName="Embedded files missing in A by Mime"
+ reportFilename="attachments/embedded_files_missing_in_A_by_mime.xlsx"
+ format="xlsx"
+ includeSql="true">
+
+ <sql>
+ select mime_string, count(1) as cnt
+ from profiles_b pb
+ left join profiles_a pa on pb.id=pa.id
+ join mimes m on pb.mime_id=m.mime_id
+ where pa.id is null and pb.is_embedded=true
+ group by mime_string
+ order by cnt desc
+ </sql>
+ </report>
<!-- metadata values -->
<report reportName="Metadata Value Diffs"
reportFilename="metadata/metadata_value_count_diffs.xlsx"