You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hop.apache.org by ha...@apache.org on 2022/06/06 20:39:57 UTC

[hop] branch master updated: HOP-3939 Support for checksum transformation for null values

This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/master by this push:
     new 99aa62831e HOP-3939 Support for checksum transformation for null values
     new 75ded0efb6 Merge pull request #1526 from nadment/HOP-3939
99aa62831e is described below

commit 99aa62831e8c0b79de6a5bb53e51480b32e498ee
Author: Nicolas Adment <na...@gmail.com>
AuthorDate: Mon Jun 6 15:24:28 2022 +0200

    HOP-3939 Support for checksum transformation for null values
---
 integration-tests/transforms/0015-add-checksum.hpl | 164 +++++++++++++++++----
 .../transforms/datasets/golden-add-checksum.csv    |   3 +
 .../metadata/unit-test/0015-add-checksum UNIT.json |  25 ++--
 .../hop/pipeline/transforms/checksum/CheckSum.java |  55 ++++---
 .../checksum/messages/messages_fr_FR.properties    |   5 -
 5 files changed, 189 insertions(+), 63 deletions(-)

diff --git a/integration-tests/transforms/0015-add-checksum.hpl b/integration-tests/transforms/0015-add-checksum.hpl
index 1a77061ba0..6ebd3cab73 100644
--- a/integration-tests/transforms/0015-add-checksum.hpl
+++ b/integration-tests/transforms/0015-add-checksum.hpl
@@ -50,21 +50,11 @@ limitations under the License.
       <to>mod</to>
       <enabled>Y</enabled>
     </hop>
-    <hop>
-      <from>mod</from>
-      <to>CRC32</to>
-      <enabled>Y</enabled>
-    </hop>
     <hop>
       <from>CRC32</from>
       <to>addler32</to>
       <enabled>Y</enabled>
     </hop>
-    <hop>
-      <from>addler32</from>
-      <to>md5</to>
-      <enabled>Y</enabled>
-    </hop>
     <hop>
       <from>md5</from>
       <to>sha1</to>
@@ -80,12 +70,56 @@ limitations under the License.
       <to>sha384</to>
       <enabled>Y</enabled>
     </hop>
+    <hop>
+      <from>addler32</from>
+      <to>md5</to>
+      <enabled>Y</enabled>
+    </hop>
+    <hop>
+      <from>Special cases null values</from>
+      <to>Append streams</to>
+      <enabled>Y</enabled>
+    </hop>
+    <hop>
+      <from>mod</from>
+      <to>Append streams</to>
+      <enabled>Y</enabled>
+    </hop>
+    <hop>
+      <from>Append streams</from>
+      <to>CRC32</to>
+      <enabled>Y</enabled>
+    </hop>
     <hop>
       <from>sha384</from>
       <to>sha512</to>
       <enabled>Y</enabled>
     </hop>
+    <hop>
+      <from>sha512</from>
+      <to>Dummy (do nothing)</to>
+      <enabled>Y</enabled>
+    </hop>
   </order>
+  <transform>
+    <name>Append streams</name>
+    <type>Append</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <head_name>Special cases null values</head_name>
+    <tail_name>mod</tail_name>
+    <attributes/>
+    <GUI>
+      <xloc>208</xloc>
+      <yloc>352</yloc>
+    </GUI>
+  </transform>
   <transform>
     <name>CRC32</name>
     <type>CheckSum</type>
@@ -110,8 +144,8 @@ limitations under the License.
     <resultType>hexadecimal</resultType>
     <attributes/>
     <GUI>
-      <xloc>96</xloc>
-      <yloc>176</yloc>
+      <xloc>304</xloc>
+      <yloc>352</yloc>
     </GUI>
   </transform>
   <transform>
@@ -133,9 +167,64 @@ limitations under the License.
     <limit>100</limit>
     <row_time_field>now</row_time_field>
     <attributes/>
+    <GUI>
+      <xloc>208</xloc>
+      <yloc>96</yloc>
+    </GUI>
+  </transform>
+  <transform>
+    <name>Special cases null values</name>
+    <type>DataGrid</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <fields>
+      <field>
+        <currency/>
+        <decimal/>
+        <set_empty_string>N</set_empty_string>
+        <format/>
+        <group/>
+        <length>-1</length>
+        <name>id</name>
+        <precision>-1</precision>
+        <type>Integer</type>
+      </field>
+      <field>
+        <currency/>
+        <decimal/>
+        <set_empty_string>N</set_empty_string>
+        <format/>
+        <group/>
+        <length>-1</length>
+        <name>mod</name>
+        <precision>-1</precision>
+        <type>Integer</type>
+      </field>
+    </fields>
+    <data>
+      <line>
+        <item/>
+        <item>2</item>
+      </line>
+      <line>
+        <item>1</item>
+        <item/>
+      </line>
+      <line>
+        <item/>
+        <item/>
+      </line>
+    </data>
+    <attributes/>
     <GUI>
       <xloc>96</xloc>
-      <yloc>80</yloc>
+      <yloc>352</yloc>
     </GUI>
   </transform>
   <transform>
@@ -162,8 +251,8 @@ limitations under the License.
     <resultType>hexadecimal</resultType>
     <attributes/>
     <GUI>
-      <xloc>192</xloc>
-      <yloc>176</yloc>
+      <xloc>400</xloc>
+      <yloc>352</yloc>
     </GUI>
   </transform>
   <transform>
@@ -186,8 +275,8 @@ limitations under the License.
     <valuename>id</valuename>
     <attributes/>
     <GUI>
-      <xloc>240</xloc>
-      <yloc>80</yloc>
+      <xloc>208</xloc>
+      <yloc>176</yloc>
     </GUI>
   </transform>
   <transform>
@@ -214,8 +303,8 @@ limitations under the License.
     <resultType>hexadecimal</resultType>
     <attributes/>
     <GUI>
-      <xloc>288</xloc>
-      <yloc>176</yloc>
+      <xloc>496</xloc>
+      <yloc>352</yloc>
     </GUI>
   </transform>
   <transform>
@@ -251,8 +340,8 @@ limitations under the License.
     </calculation>
     <attributes/>
     <GUI>
-      <xloc>368</xloc>
-      <yloc>80</yloc>
+      <xloc>208</xloc>
+      <yloc>256</yloc>
     </GUI>
   </transform>
   <transform>
@@ -279,8 +368,8 @@ limitations under the License.
     <resultType>hexadecimal</resultType>
     <attributes/>
     <GUI>
-      <xloc>384</xloc>
-      <yloc>176</yloc>
+      <xloc>592</xloc>
+      <yloc>352</yloc>
     </GUI>
   </transform>
   <transform>
@@ -307,8 +396,8 @@ limitations under the License.
     <resultType>hexadecimal</resultType>
     <attributes/>
     <GUI>
-      <xloc>480</xloc>
-      <yloc>176</yloc>
+      <xloc>688</xloc>
+      <yloc>352</yloc>
     </GUI>
   </transform>
   <transform>
@@ -335,8 +424,8 @@ limitations under the License.
     <resultType>hexadecimal</resultType>
     <attributes/>
     <GUI>
-      <xloc>576</xloc>
-      <yloc>176</yloc>
+      <xloc>784</xloc>
+      <yloc>352</yloc>
     </GUI>
   </transform>
   <transform>
@@ -363,8 +452,25 @@ limitations under the License.
     <resultType>hexadecimal</resultType>
     <attributes/>
     <GUI>
-      <xloc>672</xloc>
-      <yloc>176</yloc>
+      <xloc>880</xloc>
+      <yloc>352</yloc>
+    </GUI>
+  </transform>
+  <transform>
+    <name>Dummy (do nothing)</name>
+    <type>Dummy</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <attributes/>
+    <GUI>
+      <xloc>992</xloc>
+      <yloc>352</yloc>
     </GUI>
   </transform>
   <transform_error_handling>
diff --git a/integration-tests/transforms/datasets/golden-add-checksum.csv b/integration-tests/transforms/datasets/golden-add-checksum.csv
index 97ff339e17..ff09221d1e 100644
--- a/integration-tests/transforms/datasets/golden-add-checksum.csv
+++ b/integration-tests/transforms/datasets/golden-add-checksum.csv
@@ -1,4 +1,7 @@
 id,mod,crc32,addler32,md5,sha1,sha256,sha384,sha512
+,2,450215437,3342387,c81e728d9d4c2f636f067f89cc14862c,da4b9237bacccdf19c0760cab7aec4a8359010b0,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35,d063457705d66d6f016e4cdd747db3af8d70ebfd36badd63de6c8ca4a9d8bfb5d874e7fbd750aa804dcaddae7eeef51e,40b244112641dd78dd4f93b6c9190dd46e0099194d5a44257b7efad6ef9ff4683da1eda0244448cb343aa688f5d3efd7314dafe580ac0bcbf115aeca9e8dc114
+1,,2212294583,3276850,c4ca4238a0b923820dcc509a6f75849b,356a192b7913b04c54574d18c28d46e6395428ab,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b,47f05d367b0c32e438fb63e6cf4a5f35c2aa2f90dc7543f8a41a0f95ce8a40a313ab5cf36134a2068c4c969cb50db776,4dff4ea340f0a823f15d3f4f01ab62eae0e5da579ccb851f8db9dfe84c58b2b37b89903a740e1ee172da793a6e79d560e5f7f9bd058a12a280433ed6fa46510a
+,,,,,,,,
 1,1,3596227959,9764963,6512bd43d9caa6e02c990b0a82652dca,17ba0791499db908433b80f37c5fbc89b870084b,4fc82b26aecb47d2868c4efbe3581732a3e7cbcc6c2efb32062c08170a05eeb8,9b20aa6472eef4fd186d231637b1c1d55a5a434cc9130d6afcaaf486253a20c23a4eaeea419594c17f46bc53c7cee12e,74a49c698dbd3c12e36b0b287447d833f74f3937ff132ebff7054baa18623c35a705bb18b82e2ac0384b5127db97016e63609f712bc90e3506cfbea97599f46f
 2,2,1685985038,9961573,b6d767d2f8ed5d21a44b0e5886680cb9,12c6fc06c99a462375eeb3f43dfd832b08ca9e17,785f3ec7eb32f30b90cd0fcf3657d388b5ff4297f2f9716ff66e9b69c05ddd09,1ba40d8a5dcd0f2f0071687f3253f59780a582305a0cee1a49a56a4736dce4fc8af88372c79393a3a569aeda0c15959d,6ad275d26c200e81534d9996183c8748ddfabc7b0a011a90f46301626d709923474703cacab0ff8b67cd846b6cb55b23a39b03fbdfb5218eec3373cf7010a166
 3,0,2473281379,9961572,34173cb38f07f89ddbebc2ac9128303f,22d200f8670dbdb3e253a90eee5098477c95c23d,624b60c58c9d8bfb6ff1886c2fd605d2adeb6ea4da576068201b6c6958ce93f4,32f5039553078543bf8748756a64c8b02338afbc1ee3c70dde5988760c3b8833e0e3c830fea5b65f08cb803842eb6ed6,1ccbff33e55627a50beca8cf5c89f77c3165dcb3218171308423f250f0bb0be9700bbfdd92d35dfa2e579110266a40194d707b50e7d27b6f09b81fbbf80231a3
diff --git a/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json b/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json
index 33cde6ed29..4f2c932843 100644
--- a/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json	
+++ b/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json	
@@ -7,25 +7,25 @@
     {
       "field_mappings": [
         {
-          "transform_field": "addler32",
-          "data_set_field": "addler32"
+          "transform_field": "id",
+          "data_set_field": "id"
+        },
+        {
+          "transform_field": "mod",
+          "data_set_field": "mod"
         },
         {
           "transform_field": "crc32",
           "data_set_field": "crc32"
         },
         {
-          "transform_field": "id",
-          "data_set_field": "id"
+          "transform_field": "addler32",
+          "data_set_field": "addler32"
         },
         {
           "transform_field": "md5",
           "data_set_field": "md5"
         },
-        {
-          "transform_field": "mod",
-          "data_set_field": "mod"
-        },
         {
           "transform_field": "sha1",
           "data_set_field": "sha1"
@@ -37,12 +37,17 @@
         {
           "transform_field": "sha384",
           "data_set_field": "sha384"
+        },
+        {
+          "transform_field": "sha512",
+          "data_set_field": "sha512"
         }
       ],
       "field_order": [
-        "id"
+        "id",
+        "mod"
       ],
-      "transform_name": "sha512",
+      "transform_name": "Dummy (do nothing)",
       "data_set_name": "golden-add-checksum"
     }
   ],
diff --git a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
index 6614e99d7f..17c2e77666 100644
--- a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
+++ b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
@@ -19,6 +19,7 @@ package org.apache.hop.pipeline.transforms.checksum;
 
 import org.apache.commons.codec.binary.Hex;
 import org.apache.hop.core.exception.HopException;
+import org.apache.hop.core.row.IValueMeta;
 import org.apache.hop.core.row.RowDataUtil;
 import org.apache.hop.core.util.Utils;
 import org.apache.hop.i18n.BaseMessages;
@@ -108,7 +109,6 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
         outputRowData = RowDataUtil.addValueData(r, data.nrInfields, checksum);
       } else {
         // get checksum
-
         byte[] o = createCheckSum(r);
 
         switch (meta.getResultType()) {
@@ -116,12 +116,13 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
             outputRowData = RowDataUtil.addValueData(r, data.nrInfields, o);
             break;
           case HEXADECIMAL:
-            String hex = new String(Hex.encodeHex(o));
+            String hex = (o == null) ? null : Hex.encodeHexString(o);
             outputRowData = RowDataUtil.addValueData(r, data.nrInfields, hex);
             break;
           case STRING:
           default:
-            outputRowData = RowDataUtil.addValueData(r, data.nrInfields, getStringFromBytes(o));
+            String str = (o == null) ? null : getStringFromBytes(o);
+            outputRowData = RowDataUtil.addValueData(r, data.nrInfields, str);
             break;
         }
       }
@@ -162,18 +163,25 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
 
     // Loop through fields
     for (int i = 0; i < data.fieldnr; i++) {
-      if (getInputRowMeta().getValueMeta(data.fieldnrs[i]).isBinary()) {
-        baos.write(getInputRowMeta().getBinary(r, data.fieldnrs[i]));
+      IValueMeta valueMeta = getInputRowMeta().getValueMeta(data.fieldnrs[i]);
+      if (valueMeta.isBinary()) {
+        byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]);
+        if (bytes != null) {
+          baos.write(bytes);
+        }
       } else {
-        baos.write(
-            getInputRowMeta()
-                .getValueMeta(data.fieldnrs[i])
-                .getNativeDataType(r[data.fieldnrs[i]])
-                .toString()
-                .getBytes());
+        Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]);
+        if (value != null) {
+          baos.write(value.toString().getBytes());
+        }    
       }
     }
 
+    // Return null when all input values are null.
+    if (baos.size() == 0) {
+      return null;
+    }
+    
     // Updates the digest using the specified array of bytes
     data.digest.update(baos.toByteArray());
 
@@ -204,17 +212,26 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
 
     // Loop through fields
     for (int i = 0; i < data.fieldnr; i++) {
-      if (getInputRowMeta().getValueMeta(data.fieldnrs[i]).isBinary()) {
-        baos.write(getInputRowMeta().getBinary(r, data.fieldnrs[i]));
+      IValueMeta valueMeta = getInputRowMeta().getValueMeta(data.fieldnrs[i]);
+
+      if (valueMeta.isBinary()) {
+        byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]);
+        if (bytes != null) {
+          baos.write(bytes);
+        }
       } else {
-        baos.write(
-            getInputRowMeta()
-                .getValueMeta(data.fieldnrs[i])
-                .getNativeDataType(r[data.fieldnrs[i]])
-                .toString()
-                .getBytes());
+        Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]);
+        if (value != null) {
+          baos.write(value.toString().getBytes());
+        }
       }
     }
+
+    // Return null when all input values are null.
+    if (baos.size() == 0) {
+      return null;
+    }
+    
     byteArray = baos.toByteArray();
 
     if (meta.getCheckSumType() == CheckSumMeta.CheckSumType.CRC32) {
diff --git a/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties b/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties
index e0e30dbc1e..50518ff478 100644
--- a/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties
+++ b/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties
@@ -36,11 +36,6 @@ CheckSumMeta.CheckResult.TransformRecevingData=La transformation est connect\u00
 CheckSumMeta.ResultType.Binary=Octets
 CheckSumMeta.ResultType.Hexadecimal=Hexad\u00E9cimal
 CheckSumMeta.ResultType.String=Cha\u00EEne de caract\u00E8res
-CheckSumMeta.Type.ADLER32=Adler 32
-CheckSumMeta.Type.CRC32=CRC 32
-CheckSumMeta.Type.MD5=MD5
-CheckSumMeta.Type.SHA1=SHA 1
-CheckSumMeta.Type.SHA256=SHA 256
 CheckSumDialog.Fieldname.Column=Champ
 CheckSumDialog.Fields.Label=Champs utilis\u00E9s dans le calcul
 CheckSumDialog.Result.Label=Champ r\u00E9sultat