You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hop.apache.org by ha...@apache.org on 2022/07/06 13:10:27 UTC

[hop] branch master updated: HOP-3939: fix empty string returns null in stead of hash

This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/master by this push:
     new 35a4d76c7a HOP-3939: fix empty string returns null in stead of hash
     new c25c03d58d Merge pull request #1580 from hansva/master
35a4d76c7a is described below

commit 35a4d76c7a342167a2c80d64b1fdd42c7fd25554
Author: Hans Van Akelyen <ha...@gmail.com>
AuthorDate: Wed Jul 6 14:30:32 2022 +0200

    HOP-3939: fix empty string returns null in stead of hash
---
 .../transforms/0040-text-file-input-utf-bom.hpl    |   4 +-
 integration-tests/transforms/0043-checksum.hpl     | 198 +++++++++++++++++++++
 .../transforms/datasets/data-set-filename.csv      |   6 +
 .../transforms/main-0040-text-file-input.hwf       |   8 +-
 ...-text-file-input.hwf => main-0043-checksum.hwf} |  40 ++---
 .../metadata/dataset/golden-checksum.json          |  32 ++++
 .../metadata/unit-test/0043-checksum UNIT.json     |  38 ++++
 .../hop/pipeline/transforms/checksum/CheckSum.java |  10 +-
 8 files changed, 298 insertions(+), 38 deletions(-)

diff --git a/integration-tests/transforms/0040-text-file-input-utf-bom.hpl b/integration-tests/transforms/0040-text-file-input-utf-bom.hpl
index a76aaf3fc8..7351185ea8 100644
--- a/integration-tests/transforms/0040-text-file-input-utf-bom.hpl
+++ b/integration-tests/transforms/0040-text-file-input-utf-bom.hpl
@@ -34,7 +34,7 @@ limitations under the License.
     <created_date>2021/12/21 14:38:28.717</created_date>
     <modified_user>-</modified_user>
     <modified_date>2021/12/21 14:38:28.717</modified_date>
-    <key_for_session_key/>
+    <key_for_session_key>H4sIAAAAAAAA/wMAAAAAAAAAAAA=</key_for_session_key>
     <is_key_private>N</is_key_private>
   </info>
   <notepads>
@@ -97,7 +97,7 @@ limitations under the License.
     <rownum>N</rownum>
     <rownumByFile>N</rownumByFile>
     <rownum_field/>
-    <format>DOS</format>
+    <format>mixed</format>
     <encoding>UTF-8</encoding>
     <length>Characters</length>
     <add_to_result_filenames>Y</add_to_result_filenames>
diff --git a/integration-tests/transforms/0043-checksum.hpl b/integration-tests/transforms/0043-checksum.hpl
new file mode 100644
index 0000000000..9f8ade1542
--- /dev/null
+++ b/integration-tests/transforms/0043-checksum.hpl
@@ -0,0 +1,198 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<pipeline>
+  <info>
+    <name>0043-checksum</name>
+    <name_sync_with_filename>Y</name_sync_with_filename>
+    <description/>
+    <extended_description/>
+    <pipeline_version/>
+    <pipeline_type>Normal</pipeline_type>
+    <parameters>
+    </parameters>
+    <capture_transform_performance>N</capture_transform_performance>
+    <transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
+    <transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
+    <created_user>-</created_user>
+    <created_date>2022/04/29 07:47:12.357</created_date>
+    <modified_user>-</modified_user>
+    <modified_date>2022/04/29 07:47:12.357</modified_date>
+    <key_for_session_key>H4sIAAAAAAAA/wMAAAAAAAAAAAA=</key_for_session_key>
+    <is_key_private>N</is_key_private>
+  </info>
+  <notepads>
+  </notepads>
+  <order>
+    <hop>
+      <from>Data grid</from>
+      <to>Replace in string</to>
+      <enabled>Y</enabled>
+    </hop>
+    <hop>
+      <from>Replace in string</from>
+      <to>Add a checksum</to>
+      <enabled>Y</enabled>
+    </hop>
+    <hop>
+      <from>Add a checksum</from>
+      <to>verify</to>
+      <enabled>Y</enabled>
+    </hop>
+  </order>
+  <transform>
+    <name>Data grid</name>
+    <type>DataGrid</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <fields>
+      <field>
+        <currency/>
+        <decimal/>
+        <set_empty_string>N</set_empty_string>
+        <format/>
+        <group/>
+        <length>-1</length>
+        <name>row</name>
+        <precision>-1</precision>
+        <type>Integer</type>
+      </field>
+      <field>
+        <currency/>
+        <decimal/>
+        <set_empty_string>N</set_empty_string>
+        <format/>
+        <group/>
+        <length>-1</length>
+        <name>value</name>
+        <precision>-1</precision>
+        <type>String</type>
+      </field>
+    </fields>
+    <data>
+      <line>
+        <item>1</item>
+        <item>null</item>
+      </line>
+      <line>
+        <item>2</item>
+        <item/>
+      </line>
+      <line>
+        <item>3</item>
+        <item>String one</item>
+      </line>
+      <line>
+        <item>4</item>
+        <item>String two</item>
+      </line>
+      <line>
+        <item>5</item>
+        <item>String three</item>
+      </line>
+    </data>
+    <attributes/>
+    <GUI>
+      <xloc>112</xloc>
+      <yloc>80</yloc>
+    </GUI>
+  </transform>
+  <transform>
+    <name>Replace in string</name>
+    <type>ReplaceString</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <fields>
+      <field>
+        <in_stream_name>value</in_stream_name>
+        <out_stream_name/>
+        <use_regex>no</use_regex>
+        <replace_string>null</replace_string>
+        <replace_by_string/>
+        <set_empty_string>Y</set_empty_string>
+        <replace_field_by_string/>
+        <whole_word>no</whole_word>
+        <case_sensitive>no</case_sensitive>
+        <is_unicode>no</is_unicode>
+      </field>
+    </fields>
+    <attributes/>
+    <GUI>
+      <xloc>261</xloc>
+      <yloc>80</yloc>
+    </GUI>
+  </transform>
+  <transform>
+    <name>Add a checksum</name>
+    <type>CheckSum</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <checksumtype>SHA-256</checksumtype>
+    <fields>
+      <field>
+        <name>value</name>
+      </field>
+    </fields>
+    <resultfieldName>checksum</resultfieldName>
+    <resultType>string</resultType>
+    <attributes/>
+    <GUI>
+      <xloc>410</xloc>
+      <yloc>80</yloc>
+    </GUI>
+  </transform>
+  <transform>
+    <name>verify</name>
+    <type>Dummy</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <attributes/>
+    <GUI>
+      <xloc>544</xloc>
+      <yloc>80</yloc>
+    </GUI>
+  </transform>
+  <transform_error_handling>
+  </transform_error_handling>
+  <attributes/>
+</pipeline>
diff --git a/integration-tests/transforms/datasets/data-set-filename.csv b/integration-tests/transforms/datasets/data-set-filename.csv
new file mode 100644
index 0000000000..3c1de166df
--- /dev/null
+++ b/integration-tests/transforms/datasets/data-set-filename.csv
@@ -0,0 +1,6 @@
+row,value,checksum
+1,,227-176-196-66-152-252-28-20-154-251-244-200-153-111-185-36-39-174-65-228-100-155-147-76-164-149-153-27-120-82-184-85
+2,,
+3,String one,196-100-68-36-145-62-9-250-247-10-137-92-53-18-82-198-218-31-215-29-97-177-51-6-226-233-72-214-236-101-11-157
+4,String two,216-22-234-217-166-108-105-92-228-110-19-239-5-97-228-35-67-159-183-25-193-189-43-50-41-151-20-27-36-133-234-26
+5,String three,159-239-11-199-219-83-12-30-213-150-105-174-130-217-190-5-167-85-19-137-136-160-56-86-51-229-196-113-16-13-129-178
diff --git a/integration-tests/transforms/main-0040-text-file-input.hwf b/integration-tests/transforms/main-0040-text-file-input.hwf
index 5559200692..4c68f4100c 100644
--- a/integration-tests/transforms/main-0040-text-file-input.hwf
+++ b/integration-tests/transforms/main-0040-text-file-input.hwf
@@ -49,13 +49,13 @@ limitations under the License.
       <attributes_hac/>
     </action>
     <action>
-      <name>Run Apache Tika Tests</name>
+      <name>Run text-file-input test</name>
       <description/>
       <type>RunPipelineTests</type>
       <attributes/>
       <test_names>
         <test_name>
-          <name>0037-apache-tika UNIT</name>
+          <name>0040-text-file-input-utf-bom UNIT</name>
         </test_name>
       </test_names>
       <parallel>N</parallel>
@@ -67,7 +67,7 @@ limitations under the License.
   <hops>
     <hop>
       <from>Start</from>
-      <to>Run Apache Tika Tests</to>
+      <to>Run text-file-input test</to>
       <enabled>Y</enabled>
       <evaluation>Y</evaluation>
       <unconditional>Y</unconditional>
@@ -78,7 +78,7 @@ limitations under the License.
       <note>Test reading CSV files with the CSV Input transform </note>
       <xloc>128</xloc>
       <yloc>208</yloc>
-      <width>279</width>
+      <width>284</width>
       <heigth>26</heigth>
       <fontname>Inter</fontname>
       <fontsize>11</fontsize>
diff --git a/integration-tests/transforms/main-0040-text-file-input.hwf b/integration-tests/transforms/main-0043-checksum.hwf
similarity index 63%
copy from integration-tests/transforms/main-0040-text-file-input.hwf
copy to integration-tests/transforms/main-0043-checksum.hwf
index 5559200692..1c8e056475 100644
--- a/integration-tests/transforms/main-0040-text-file-input.hwf
+++ b/integration-tests/transforms/main-0043-checksum.hwf
@@ -18,15 +18,15 @@ limitations under the License.
 
 -->
 <workflow>
-  <name>main-0040-text-file-input</name>
+  <name>main-0043-checksum</name>
   <name_sync_with_filename>Y</name_sync_with_filename>
   <description/>
   <extended_description/>
   <workflow_version/>
   <created_user>-</created_user>
-  <created_date>2021/05/31 11:58:41.121</created_date>
+  <created_date>2022/04/29 10:42:16.470</created_date>
   <modified_user>-</modified_user>
-  <modified_date>2021/05/31 11:58:41.121</modified_date>
+  <modified_date>2022/04/29 10:42:16.470</modified_date>
   <parameters>
     </parameters>
   <actions>
@@ -44,56 +44,36 @@ limitations under the License.
       <weekDay>1</weekDay>
       <DayOfMonth>1</DayOfMonth>
       <parallel>N</parallel>
-      <xloc>128</xloc>
-      <yloc>112</yloc>
+      <xloc>50</xloc>
+      <yloc>50</yloc>
       <attributes_hac/>
     </action>
     <action>
-      <name>Run Apache Tika Tests</name>
+      <name>Run Checksum Unit Test</name>
       <description/>
       <type>RunPipelineTests</type>
       <attributes/>
       <test_names>
         <test_name>
-          <name>0037-apache-tika UNIT</name>
+          <name>0043-checksum UNIT</name>
         </test_name>
       </test_names>
       <parallel>N</parallel>
-      <xloc>416</xloc>
-      <yloc>112</yloc>
+      <xloc>192</xloc>
+      <yloc>48</yloc>
       <attributes_hac/>
     </action>
   </actions>
   <hops>
     <hop>
       <from>Start</from>
-      <to>Run Apache Tika Tests</to>
+      <to>Run Checksum Unit Test</to>
       <enabled>Y</enabled>
       <evaluation>Y</evaluation>
       <unconditional>Y</unconditional>
     </hop>
   </hops>
   <notepads>
-    <notepad>
-      <note>Test reading CSV files with the CSV Input transform </note>
-      <xloc>128</xloc>
-      <yloc>208</yloc>
-      <width>279</width>
-      <heigth>26</heigth>
-      <fontname>Inter</fontname>
-      <fontsize>11</fontsize>
-      <fontbold>N</fontbold>
-      <fontitalic>N</fontitalic>
-      <fontcolorred>14</fontcolorred>
-      <fontcolorgreen>58</fontcolorgreen>
-      <fontcolorblue>90</fontcolorblue>
-      <backgroundcolorred>201</backgroundcolorred>
-      <backgroundcolorgreen>232</backgroundcolorgreen>
-      <backgroundcolorblue>251</backgroundcolorblue>
-      <bordercolorred>14</bordercolorred>
-      <bordercolorgreen>58</bordercolorgreen>
-      <bordercolorblue>90</bordercolorblue>
-    </notepad>
   </notepads>
   <attributes/>
 </workflow>
diff --git a/integration-tests/transforms/metadata/dataset/golden-checksum.json b/integration-tests/transforms/metadata/dataset/golden-checksum.json
new file mode 100644
index 0000000000..231bc72c60
--- /dev/null
+++ b/integration-tests/transforms/metadata/dataset/golden-checksum.json
@@ -0,0 +1,32 @@
+{
+  "base_filename": "data-set-filename.csv",
+  "name": "golden-checksum",
+  "description": "",
+  "dataset_fields": [
+    {
+      "field_comment": "",
+      "field_length": -1,
+      "field_type": 5,
+      "field_precision": 0,
+      "field_format": "####0;-####0",
+      "field_name": "row"
+    },
+    {
+      "field_comment": "",
+      "field_length": -1,
+      "field_type": 2,
+      "field_precision": -1,
+      "field_format": "",
+      "field_name": "value"
+    },
+    {
+      "field_comment": "",
+      "field_length": -1,
+      "field_type": 2,
+      "field_precision": -1,
+      "field_format": "",
+      "field_name": "checksum"
+    }
+  ],
+  "folder_name": ""
+}
\ No newline at end of file
diff --git a/integration-tests/transforms/metadata/unit-test/0043-checksum UNIT.json b/integration-tests/transforms/metadata/unit-test/0043-checksum UNIT.json
new file mode 100644
index 0000000000..22dd6946fe
--- /dev/null
+++ b/integration-tests/transforms/metadata/unit-test/0043-checksum UNIT.json	
@@ -0,0 +1,38 @@
+{
+  "variableValues": [],
+  "database_replacements": [],
+  "autoOpening": true,
+  "basePath": "",
+  "golden_data_sets": [
+    {
+      "field_mappings": [
+        {
+          "transform_field": "row",
+          "data_set_field": "row"
+        },
+        {
+          "transform_field": "value",
+          "data_set_field": "value"
+        },
+        {
+          "transform_field": "checksum",
+          "data_set_field": "checksum"
+        }
+      ],
+      "field_order": [
+        "row",
+        "value",
+        "checksum"
+      ],
+      "transform_name": "verify",
+      "data_set_name": "golden-checksum"
+    }
+  ],
+  "input_data_sets": [],
+  "name": "0043-checksum UNIT",
+  "description": "",
+  "trans_test_tweaks": [],
+  "persist_filename": "",
+  "pipeline_filename": "./0043-checksum.hpl",
+  "test_type": "UNIT_TEST"
+}
\ No newline at end of file
diff --git a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
index 17c2e77666..3e77b05fc2 100644
--- a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
+++ b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
@@ -160,6 +160,7 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
   private byte[] createCheckSum(Object[] r) throws Exception {
 
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    boolean valueAdded = false;
 
     // Loop through fields
     for (int i = 0; i < data.fieldnr; i++) {
@@ -167,18 +168,20 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
       if (valueMeta.isBinary()) {
         byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]);
         if (bytes != null) {
+          valueAdded = true;
           baos.write(bytes);
         }
       } else {
         Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]);
         if (value != null) {
+          valueAdded = true;
           baos.write(value.toString().getBytes());
         }    
       }
     }
 
     // Return null when all input values are null.
-    if (baos.size() == 0) {
+    if (!valueAdded) {
       return null;
     }
     
@@ -209,6 +212,7 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
     byte[] byteArray;
 
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    boolean valueAdded = false;
 
     // Loop through fields
     for (int i = 0; i < data.fieldnr; i++) {
@@ -217,18 +221,20 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
       if (valueMeta.isBinary()) {
         byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]);
         if (bytes != null) {
+          valueAdded = true;
           baos.write(bytes);
         }
       } else {
         Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]);
         if (value != null) {
+          valueAdded= true;
           baos.write(value.toString().getBytes());
         }
       }
     }
 
     // Return null when all input values are null.
-    if (baos.size() == 0) {
+    if (!valueAdded) {
       return null;
     }