You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/05/07 14:30:32 UTC

[tika] branch master updated: TIKA-2863 -- add reports that compare parse times per mime type to tika-eval

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 2584893  TIKA-2863 -- add reports that compare parse times per mime type to tika-eval
2584893 is described below

commit 2584893eb8c7c8dd3bf363567b0bdc23a94dd23f
Author: TALLISON <ta...@apache.org>
AuthorDate: Tue May 7 10:30:16 2019 -0400

    TIKA-2863 -- add reports that compare parse times per mime type to tika-eval
---
 .../src/main/resources/comparison-reports.xml      | 58 ++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tika-eval/src/main/resources/comparison-reports.xml b/tika-eval/src/main/resources/comparison-reports.xml
index c0f96aa..d3e2329 100644
--- a/tika-eval/src/main/resources/comparison-reports.xml
+++ b/tika-eval/src/main/resources/comparison-reports.xml
@@ -665,6 +665,49 @@
             group by mime_id_a, mime_id_b
             );
         </sql>
+        <sql>
+            drop table if exists parse_time_compared;
+        </sql>
+        <sql>
+            create table parse_time_compared (
+            mime_id_a integer,
+            mime_id_b integer,
+            total_a bigint,
+            total_b bigint,
+            prcnt_increase double
+            );
+        </sql>
+            <sql>
+                insert into parse_time_compared (mime_id_a, mime_id_b,
+                total_a, total_b, prcnt_increase)
+                select ma.mime_id, mb.mime_id,0,0,0.0
+                from profiles_a a
+                join profiles_b b on a.id=b.id
+                join mimes ma on ma.mime_id=a.mime_id
+                join mimes mb on mb.mime_id=b.mime_id
+                group by ma.mime_id, mb.mime_id
+            </sql>
+        <sql>
+            update parse_time_compared ptc set total_a=(
+            select sum(pa.elapsed_time_millis) as total_a from profiles_a pa
+            join profiles_b pb on pa.id=pb.id
+            where pa.mime_id= ptc.mime_id_a
+            and pb.mime_id=ptc.mime_id_b
+            group by mime_id_a, mime_id_b)
+        </sql>
+        <sql>
+            update parse_time_compared ptc set total_b=(
+            select sum(pb.elapsed_time_millis) as total_b from profiles_b pb
+            join profiles_a pa on pa.id=pb.id
+            where pa.mime_id= ptc.mime_id_a
+            and pb.mime_id=ptc.mime_id_b
+            group by mime_id_a, mime_id_b)
+        </sql>
+        <sql>
+            update parse_time_compared ptc set prcnt_increase=(
+            cast(total_b as decimal)/cast(total_a as decimal))
+            where total_a > 0;
+        </sql>
     </before>
 
     <!-- MIMES -->
@@ -1666,6 +1709,21 @@
             limit 20000
         </sql>
     </report>
+
+    <report reportName="Parse Time (Millis) Compared"
+            reportFilename="content/parse_time_millis_compared.xlsx"
+            format="xlsx"
+            includeSql="true">
+        <sql>
+            select ma.mime_string as mime_string_a,
+            mb.mime_string as mime_string_b,
+            total_a, total_b, prcnt_increase
+            from parse_time_compared ptc
+            join mimes ma on ptc.mime_id_a=ma.mime_id
+            join mimes mb on ptc.mime_id_b=mb.mime_id
+            order by prcnt_increase desc
+        </sql>
+    </report>
     <after>
         <sql>drop table if exists md5_multiples_tmp_a</sql>
         <sql>drop table if exists md5_multiples_tmp_b</sql>