You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hop.apache.org by ha...@apache.org on 2022/06/06 20:39:57 UTC
[hop] branch master updated: HOP-3939 Support for checksum transformation for null values
This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/master by this push:
new 99aa62831e HOP-3939 Support for checksum transformation for null values
new 75ded0efb6 Merge pull request #1526 from nadment/HOP-3939
99aa62831e is described below
commit 99aa62831e8c0b79de6a5bb53e51480b32e498ee
Author: Nicolas Adment <na...@gmail.com>
AuthorDate: Mon Jun 6 15:24:28 2022 +0200
HOP-3939 Support for checksum transformation for null values
---
integration-tests/transforms/0015-add-checksum.hpl | 164 +++++++++++++++++----
.../transforms/datasets/golden-add-checksum.csv | 3 +
.../metadata/unit-test/0015-add-checksum UNIT.json | 25 ++--
.../hop/pipeline/transforms/checksum/CheckSum.java | 55 ++++---
.../checksum/messages/messages_fr_FR.properties | 5 -
5 files changed, 189 insertions(+), 63 deletions(-)
diff --git a/integration-tests/transforms/0015-add-checksum.hpl b/integration-tests/transforms/0015-add-checksum.hpl
index 1a77061ba0..6ebd3cab73 100644
--- a/integration-tests/transforms/0015-add-checksum.hpl
+++ b/integration-tests/transforms/0015-add-checksum.hpl
@@ -50,21 +50,11 @@ limitations under the License.
<to>mod</to>
<enabled>Y</enabled>
</hop>
- <hop>
- <from>mod</from>
- <to>CRC32</to>
- <enabled>Y</enabled>
- </hop>
<hop>
<from>CRC32</from>
<to>addler32</to>
<enabled>Y</enabled>
</hop>
- <hop>
- <from>addler32</from>
- <to>md5</to>
- <enabled>Y</enabled>
- </hop>
<hop>
<from>md5</from>
<to>sha1</to>
@@ -80,12 +70,56 @@ limitations under the License.
<to>sha384</to>
<enabled>Y</enabled>
</hop>
+ <hop>
+ <from>addler32</from>
+ <to>md5</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Special cases null values</from>
+ <to>Append streams</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>mod</from>
+ <to>Append streams</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Append streams</from>
+ <to>CRC32</to>
+ <enabled>Y</enabled>
+ </hop>
<hop>
<from>sha384</from>
<to>sha512</to>
<enabled>Y</enabled>
</hop>
+ <hop>
+ <from>sha512</from>
+ <to>Dummy (do nothing)</to>
+ <enabled>Y</enabled>
+ </hop>
</order>
+ <transform>
+ <name>Append streams</name>
+ <type>Append</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <head_name>Special cases null values</head_name>
+ <tail_name>mod</tail_name>
+ <attributes/>
+ <GUI>
+ <xloc>208</xloc>
+ <yloc>352</yloc>
+ </GUI>
+ </transform>
<transform>
<name>CRC32</name>
<type>CheckSum</type>
@@ -110,8 +144,8 @@ limitations under the License.
<resultType>hexadecimal</resultType>
<attributes/>
<GUI>
- <xloc>96</xloc>
- <yloc>176</yloc>
+ <xloc>304</xloc>
+ <yloc>352</yloc>
</GUI>
</transform>
<transform>
@@ -133,9 +167,64 @@ limitations under the License.
<limit>100</limit>
<row_time_field>now</row_time_field>
<attributes/>
+ <GUI>
+ <xloc>208</xloc>
+ <yloc>96</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Special cases null values</name>
+ <type>DataGrid</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <fields>
+ <field>
+ <currency/>
+ <decimal/>
+ <set_empty_string>N</set_empty_string>
+ <format/>
+ <group/>
+ <length>-1</length>
+ <name>id</name>
+ <precision>-1</precision>
+ <type>Integer</type>
+ </field>
+ <field>
+ <currency/>
+ <decimal/>
+ <set_empty_string>N</set_empty_string>
+ <format/>
+ <group/>
+ <length>-1</length>
+ <name>mod</name>
+ <precision>-1</precision>
+ <type>Integer</type>
+ </field>
+ </fields>
+ <data>
+ <line>
+ <item/>
+ <item>2</item>
+ </line>
+ <line>
+ <item>1</item>
+ <item/>
+ </line>
+ <line>
+ <item/>
+ <item/>
+ </line>
+ </data>
+ <attributes/>
<GUI>
<xloc>96</xloc>
- <yloc>80</yloc>
+ <yloc>352</yloc>
</GUI>
</transform>
<transform>
@@ -162,8 +251,8 @@ limitations under the License.
<resultType>hexadecimal</resultType>
<attributes/>
<GUI>
- <xloc>192</xloc>
- <yloc>176</yloc>
+ <xloc>400</xloc>
+ <yloc>352</yloc>
</GUI>
</transform>
<transform>
@@ -186,8 +275,8 @@ limitations under the License.
<valuename>id</valuename>
<attributes/>
<GUI>
- <xloc>240</xloc>
- <yloc>80</yloc>
+ <xloc>208</xloc>
+ <yloc>176</yloc>
</GUI>
</transform>
<transform>
@@ -214,8 +303,8 @@ limitations under the License.
<resultType>hexadecimal</resultType>
<attributes/>
<GUI>
- <xloc>288</xloc>
- <yloc>176</yloc>
+ <xloc>496</xloc>
+ <yloc>352</yloc>
</GUI>
</transform>
<transform>
@@ -251,8 +340,8 @@ limitations under the License.
</calculation>
<attributes/>
<GUI>
- <xloc>368</xloc>
- <yloc>80</yloc>
+ <xloc>208</xloc>
+ <yloc>256</yloc>
</GUI>
</transform>
<transform>
@@ -279,8 +368,8 @@ limitations under the License.
<resultType>hexadecimal</resultType>
<attributes/>
<GUI>
- <xloc>384</xloc>
- <yloc>176</yloc>
+ <xloc>592</xloc>
+ <yloc>352</yloc>
</GUI>
</transform>
<transform>
@@ -307,8 +396,8 @@ limitations under the License.
<resultType>hexadecimal</resultType>
<attributes/>
<GUI>
- <xloc>480</xloc>
- <yloc>176</yloc>
+ <xloc>688</xloc>
+ <yloc>352</yloc>
</GUI>
</transform>
<transform>
@@ -335,8 +424,8 @@ limitations under the License.
<resultType>hexadecimal</resultType>
<attributes/>
<GUI>
- <xloc>576</xloc>
- <yloc>176</yloc>
+ <xloc>784</xloc>
+ <yloc>352</yloc>
</GUI>
</transform>
<transform>
@@ -363,8 +452,25 @@ limitations under the License.
<resultType>hexadecimal</resultType>
<attributes/>
<GUI>
- <xloc>672</xloc>
- <yloc>176</yloc>
+ <xloc>880</xloc>
+ <yloc>352</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Dummy (do nothing)</name>
+ <type>Dummy</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <attributes/>
+ <GUI>
+ <xloc>992</xloc>
+ <yloc>352</yloc>
</GUI>
</transform>
<transform_error_handling>
diff --git a/integration-tests/transforms/datasets/golden-add-checksum.csv b/integration-tests/transforms/datasets/golden-add-checksum.csv
index 97ff339e17..ff09221d1e 100644
--- a/integration-tests/transforms/datasets/golden-add-checksum.csv
+++ b/integration-tests/transforms/datasets/golden-add-checksum.csv
@@ -1,4 +1,7 @@
id,mod,crc32,addler32,md5,sha1,sha256,sha384,sha512
+,2,450215437,3342387,c81e728d9d4c2f636f067f89cc14862c,da4b9237bacccdf19c0760cab7aec4a8359010b0,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35,d063457705d66d6f016e4cdd747db3af8d70ebfd36badd63de6c8ca4a9d8bfb5d874e7fbd750aa804dcaddae7eeef51e,40b244112641dd78dd4f93b6c9190dd46e0099194d5a44257b7efad6ef9ff4683da1eda0244448cb343aa688f5d3efd7314dafe580ac0bcbf115aeca9e8dc114
+1,,2212294583,3276850,c4ca4238a0b923820dcc509a6f75849b,356a192b7913b04c54574d18c28d46e6395428ab,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b,47f05d367b0c32e438fb63e6cf4a5f35c2aa2f90dc7543f8a41a0f95ce8a40a313ab5cf36134a2068c4c969cb50db776,4dff4ea340f0a823f15d3f4f01ab62eae0e5da579ccb851f8db9dfe84c58b2b37b89903a740e1ee172da793a6e79d560e5f7f9bd058a12a280433ed6fa46510a
+,,,,,,,,
1,1,3596227959,9764963,6512bd43d9caa6e02c990b0a82652dca,17ba0791499db908433b80f37c5fbc89b870084b,4fc82b26aecb47d2868c4efbe3581732a3e7cbcc6c2efb32062c08170a05eeb8,9b20aa6472eef4fd186d231637b1c1d55a5a434cc9130d6afcaaf486253a20c23a4eaeea419594c17f46bc53c7cee12e,74a49c698dbd3c12e36b0b287447d833f74f3937ff132ebff7054baa18623c35a705bb18b82e2ac0384b5127db97016e63609f712bc90e3506cfbea97599f46f
2,2,1685985038,9961573,b6d767d2f8ed5d21a44b0e5886680cb9,12c6fc06c99a462375eeb3f43dfd832b08ca9e17,785f3ec7eb32f30b90cd0fcf3657d388b5ff4297f2f9716ff66e9b69c05ddd09,1ba40d8a5dcd0f2f0071687f3253f59780a582305a0cee1a49a56a4736dce4fc8af88372c79393a3a569aeda0c15959d,6ad275d26c200e81534d9996183c8748ddfabc7b0a011a90f46301626d709923474703cacab0ff8b67cd846b6cb55b23a39b03fbdfb5218eec3373cf7010a166
3,0,2473281379,9961572,34173cb38f07f89ddbebc2ac9128303f,22d200f8670dbdb3e253a90eee5098477c95c23d,624b60c58c9d8bfb6ff1886c2fd605d2adeb6ea4da576068201b6c6958ce93f4,32f5039553078543bf8748756a64c8b02338afbc1ee3c70dde5988760c3b8833e0e3c830fea5b65f08cb803842eb6ed6,1ccbff33e55627a50beca8cf5c89f77c3165dcb3218171308423f250f0bb0be9700bbfdd92d35dfa2e579110266a40194d707b50e7d27b6f09b81fbbf80231a3
diff --git a/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json b/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json
index 33cde6ed29..4f2c932843 100644
--- a/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json
+++ b/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json
@@ -7,25 +7,25 @@
{
"field_mappings": [
{
- "transform_field": "addler32",
- "data_set_field": "addler32"
+ "transform_field": "id",
+ "data_set_field": "id"
+ },
+ {
+ "transform_field": "mod",
+ "data_set_field": "mod"
},
{
"transform_field": "crc32",
"data_set_field": "crc32"
},
{
- "transform_field": "id",
- "data_set_field": "id"
+ "transform_field": "addler32",
+ "data_set_field": "addler32"
},
{
"transform_field": "md5",
"data_set_field": "md5"
},
- {
- "transform_field": "mod",
- "data_set_field": "mod"
- },
{
"transform_field": "sha1",
"data_set_field": "sha1"
@@ -37,12 +37,17 @@
{
"transform_field": "sha384",
"data_set_field": "sha384"
+ },
+ {
+ "transform_field": "sha512",
+ "data_set_field": "sha512"
}
],
"field_order": [
- "id"
+ "id",
+ "mod"
],
- "transform_name": "sha512",
+ "transform_name": "Dummy (do nothing)",
"data_set_name": "golden-add-checksum"
}
],
diff --git a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
index 6614e99d7f..17c2e77666 100644
--- a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
+++ b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
@@ -19,6 +19,7 @@ package org.apache.hop.pipeline.transforms.checksum;
import org.apache.commons.codec.binary.Hex;
import org.apache.hop.core.exception.HopException;
+import org.apache.hop.core.row.IValueMeta;
import org.apache.hop.core.row.RowDataUtil;
import org.apache.hop.core.util.Utils;
import org.apache.hop.i18n.BaseMessages;
@@ -108,7 +109,6 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
outputRowData = RowDataUtil.addValueData(r, data.nrInfields, checksum);
} else {
// get checksum
-
byte[] o = createCheckSum(r);
switch (meta.getResultType()) {
@@ -116,12 +116,13 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
outputRowData = RowDataUtil.addValueData(r, data.nrInfields, o);
break;
case HEXADECIMAL:
- String hex = new String(Hex.encodeHex(o));
+ String hex = (o == null) ? null : Hex.encodeHexString(o);
outputRowData = RowDataUtil.addValueData(r, data.nrInfields, hex);
break;
case STRING:
default:
- outputRowData = RowDataUtil.addValueData(r, data.nrInfields, getStringFromBytes(o));
+ String str = (o == null) ? null : getStringFromBytes(o);
+ outputRowData = RowDataUtil.addValueData(r, data.nrInfields, str);
break;
}
}
@@ -162,18 +163,25 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
// Loop through fields
for (int i = 0; i < data.fieldnr; i++) {
- if (getInputRowMeta().getValueMeta(data.fieldnrs[i]).isBinary()) {
- baos.write(getInputRowMeta().getBinary(r, data.fieldnrs[i]));
+ IValueMeta valueMeta = getInputRowMeta().getValueMeta(data.fieldnrs[i]);
+ if (valueMeta.isBinary()) {
+ byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]);
+ if (bytes != null) {
+ baos.write(bytes);
+ }
} else {
- baos.write(
- getInputRowMeta()
- .getValueMeta(data.fieldnrs[i])
- .getNativeDataType(r[data.fieldnrs[i]])
- .toString()
- .getBytes());
+ Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]);
+ if (value != null) {
+ baos.write(value.toString().getBytes());
+ }
}
}
+ // Return null when all input values are null.
+ if (baos.size() == 0) {
+ return null;
+ }
+
// Updates the digest using the specified array of bytes
data.digest.update(baos.toByteArray());
@@ -204,17 +212,26 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
// Loop through fields
for (int i = 0; i < data.fieldnr; i++) {
- if (getInputRowMeta().getValueMeta(data.fieldnrs[i]).isBinary()) {
- baos.write(getInputRowMeta().getBinary(r, data.fieldnrs[i]));
+ IValueMeta valueMeta = getInputRowMeta().getValueMeta(data.fieldnrs[i]);
+
+ if (valueMeta.isBinary()) {
+ byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]);
+ if (bytes != null) {
+ baos.write(bytes);
+ }
} else {
- baos.write(
- getInputRowMeta()
- .getValueMeta(data.fieldnrs[i])
- .getNativeDataType(r[data.fieldnrs[i]])
- .toString()
- .getBytes());
+ Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]);
+ if (value != null) {
+ baos.write(value.toString().getBytes());
+ }
}
}
+
+ // Return null when all input values are null.
+ if (baos.size() == 0) {
+ return null;
+ }
+
byteArray = baos.toByteArray();
if (meta.getCheckSumType() == CheckSumMeta.CheckSumType.CRC32) {
diff --git a/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties b/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties
index e0e30dbc1e..50518ff478 100644
--- a/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties
+++ b/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties
@@ -36,11 +36,6 @@ CheckSumMeta.CheckResult.TransformRecevingData=La transformation est connect\u00
CheckSumMeta.ResultType.Binary=Octets
CheckSumMeta.ResultType.Hexadecimal=Hexad\u00E9cimal
CheckSumMeta.ResultType.String=Cha\u00EEne de caract\u00E8res
-CheckSumMeta.Type.ADLER32=Adler 32
-CheckSumMeta.Type.CRC32=CRC 32
-CheckSumMeta.Type.MD5=MD5
-CheckSumMeta.Type.SHA1=SHA 1
-CheckSumMeta.Type.SHA256=SHA 256
CheckSumDialog.Fieldname.Column=Champ
CheckSumDialog.Fields.Label=Champs utilis\u00E9s dans le calcul
CheckSumDialog.Result.Label=Champ r\u00E9sultat