You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hop.apache.org by ha...@apache.org on 2022/07/06 13:10:27 UTC
[hop] branch master updated: HOP-3939: fix empty string returns null in stead of hash
This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/master by this push:
new 35a4d76c7a HOP-3939: fix empty string returns null in stead of hash
new c25c03d58d Merge pull request #1580 from hansva/master
35a4d76c7a is described below
commit 35a4d76c7a342167a2c80d64b1fdd42c7fd25554
Author: Hans Van Akelyen <ha...@gmail.com>
AuthorDate: Wed Jul 6 14:30:32 2022 +0200
HOP-3939: fix empty string returns null in stead of hash
---
.../transforms/0040-text-file-input-utf-bom.hpl | 4 +-
integration-tests/transforms/0043-checksum.hpl | 198 +++++++++++++++++++++
.../transforms/datasets/data-set-filename.csv | 6 +
.../transforms/main-0040-text-file-input.hwf | 8 +-
...-text-file-input.hwf => main-0043-checksum.hwf} | 40 ++---
.../metadata/dataset/golden-checksum.json | 32 ++++
.../metadata/unit-test/0043-checksum UNIT.json | 38 ++++
.../hop/pipeline/transforms/checksum/CheckSum.java | 10 +-
8 files changed, 298 insertions(+), 38 deletions(-)
diff --git a/integration-tests/transforms/0040-text-file-input-utf-bom.hpl b/integration-tests/transforms/0040-text-file-input-utf-bom.hpl
index a76aaf3fc8..7351185ea8 100644
--- a/integration-tests/transforms/0040-text-file-input-utf-bom.hpl
+++ b/integration-tests/transforms/0040-text-file-input-utf-bom.hpl
@@ -34,7 +34,7 @@ limitations under the License.
<created_date>2021/12/21 14:38:28.717</created_date>
<modified_user>-</modified_user>
<modified_date>2021/12/21 14:38:28.717</modified_date>
- <key_for_session_key/>
+ <key_for_session_key>H4sIAAAAAAAA/wMAAAAAAAAAAAA=</key_for_session_key>
<is_key_private>N</is_key_private>
</info>
<notepads>
@@ -97,7 +97,7 @@ limitations under the License.
<rownum>N</rownum>
<rownumByFile>N</rownumByFile>
<rownum_field/>
- <format>DOS</format>
+ <format>mixed</format>
<encoding>UTF-8</encoding>
<length>Characters</length>
<add_to_result_filenames>Y</add_to_result_filenames>
diff --git a/integration-tests/transforms/0043-checksum.hpl b/integration-tests/transforms/0043-checksum.hpl
new file mode 100644
index 0000000000..9f8ade1542
--- /dev/null
+++ b/integration-tests/transforms/0043-checksum.hpl
@@ -0,0 +1,198 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<pipeline>
+ <info>
+ <name>0043-checksum</name>
+ <name_sync_with_filename>Y</name_sync_with_filename>
+ <description/>
+ <extended_description/>
+ <pipeline_version/>
+ <pipeline_type>Normal</pipeline_type>
+ <parameters>
+ </parameters>
+ <capture_transform_performance>N</capture_transform_performance>
+ <transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
+ <transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
+ <created_user>-</created_user>
+ <created_date>2022/04/29 07:47:12.357</created_date>
+ <modified_user>-</modified_user>
+ <modified_date>2022/04/29 07:47:12.357</modified_date>
+ <key_for_session_key>H4sIAAAAAAAA/wMAAAAAAAAAAAA=</key_for_session_key>
+ <is_key_private>N</is_key_private>
+ </info>
+ <notepads>
+ </notepads>
+ <order>
+ <hop>
+ <from>Data grid</from>
+ <to>Replace in string</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Replace in string</from>
+ <to>Add a checksum</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Add a checksum</from>
+ <to>verify</to>
+ <enabled>Y</enabled>
+ </hop>
+ </order>
+ <transform>
+ <name>Data grid</name>
+ <type>DataGrid</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <fields>
+ <field>
+ <currency/>
+ <decimal/>
+ <set_empty_string>N</set_empty_string>
+ <format/>
+ <group/>
+ <length>-1</length>
+ <name>row</name>
+ <precision>-1</precision>
+ <type>Integer</type>
+ </field>
+ <field>
+ <currency/>
+ <decimal/>
+ <set_empty_string>N</set_empty_string>
+ <format/>
+ <group/>
+ <length>-1</length>
+ <name>value</name>
+ <precision>-1</precision>
+ <type>String</type>
+ </field>
+ </fields>
+ <data>
+ <line>
+ <item>1</item>
+ <item>null</item>
+ </line>
+ <line>
+ <item>2</item>
+ <item/>
+ </line>
+ <line>
+ <item>3</item>
+ <item>String one</item>
+ </line>
+ <line>
+ <item>4</item>
+ <item>String two</item>
+ </line>
+ <line>
+ <item>5</item>
+ <item>String three</item>
+ </line>
+ </data>
+ <attributes/>
+ <GUI>
+ <xloc>112</xloc>
+ <yloc>80</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Replace in string</name>
+ <type>ReplaceString</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <fields>
+ <field>
+ <in_stream_name>value</in_stream_name>
+ <out_stream_name/>
+ <use_regex>no</use_regex>
+ <replace_string>null</replace_string>
+ <replace_by_string/>
+ <set_empty_string>Y</set_empty_string>
+ <replace_field_by_string/>
+ <whole_word>no</whole_word>
+ <case_sensitive>no</case_sensitive>
+ <is_unicode>no</is_unicode>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>261</xloc>
+ <yloc>80</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Add a checksum</name>
+ <type>CheckSum</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <checksumtype>SHA-256</checksumtype>
+ <fields>
+ <field>
+ <name>value</name>
+ </field>
+ </fields>
+ <resultfieldName>checksum</resultfieldName>
+ <resultType>string</resultType>
+ <attributes/>
+ <GUI>
+ <xloc>410</xloc>
+ <yloc>80</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>verify</name>
+ <type>Dummy</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <attributes/>
+ <GUI>
+ <xloc>544</xloc>
+ <yloc>80</yloc>
+ </GUI>
+ </transform>
+ <transform_error_handling>
+ </transform_error_handling>
+ <attributes/>
+</pipeline>
diff --git a/integration-tests/transforms/datasets/data-set-filename.csv b/integration-tests/transforms/datasets/data-set-filename.csv
new file mode 100644
index 0000000000..3c1de166df
--- /dev/null
+++ b/integration-tests/transforms/datasets/data-set-filename.csv
@@ -0,0 +1,6 @@
+row,value,checksum
+1,,227-176-196-66-152-252-28-20-154-251-244-200-153-111-185-36-39-174-65-228-100-155-147-76-164-149-153-27-120-82-184-85
+2,,
+3,String one,196-100-68-36-145-62-9-250-247-10-137-92-53-18-82-198-218-31-215-29-97-177-51-6-226-233-72-214-236-101-11-157
+4,String two,216-22-234-217-166-108-105-92-228-110-19-239-5-97-228-35-67-159-183-25-193-189-43-50-41-151-20-27-36-133-234-26
+5,String three,159-239-11-199-219-83-12-30-213-150-105-174-130-217-190-5-167-85-19-137-136-160-56-86-51-229-196-113-16-13-129-178
diff --git a/integration-tests/transforms/main-0040-text-file-input.hwf b/integration-tests/transforms/main-0040-text-file-input.hwf
index 5559200692..4c68f4100c 100644
--- a/integration-tests/transforms/main-0040-text-file-input.hwf
+++ b/integration-tests/transforms/main-0040-text-file-input.hwf
@@ -49,13 +49,13 @@ limitations under the License.
<attributes_hac/>
</action>
<action>
- <name>Run Apache Tika Tests</name>
+ <name>Run text-file-input test</name>
<description/>
<type>RunPipelineTests</type>
<attributes/>
<test_names>
<test_name>
- <name>0037-apache-tika UNIT</name>
+ <name>0040-text-file-input-utf-bom UNIT</name>
</test_name>
</test_names>
<parallel>N</parallel>
@@ -67,7 +67,7 @@ limitations under the License.
<hops>
<hop>
<from>Start</from>
- <to>Run Apache Tika Tests</to>
+ <to>Run text-file-input test</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>Y</unconditional>
@@ -78,7 +78,7 @@ limitations under the License.
<note>Test reading CSV files with the CSV Input transform </note>
<xloc>128</xloc>
<yloc>208</yloc>
- <width>279</width>
+ <width>284</width>
<heigth>26</heigth>
<fontname>Inter</fontname>
<fontsize>11</fontsize>
diff --git a/integration-tests/transforms/main-0040-text-file-input.hwf b/integration-tests/transforms/main-0043-checksum.hwf
similarity index 63%
copy from integration-tests/transforms/main-0040-text-file-input.hwf
copy to integration-tests/transforms/main-0043-checksum.hwf
index 5559200692..1c8e056475 100644
--- a/integration-tests/transforms/main-0040-text-file-input.hwf
+++ b/integration-tests/transforms/main-0043-checksum.hwf
@@ -18,15 +18,15 @@ limitations under the License.
-->
<workflow>
- <name>main-0040-text-file-input</name>
+ <name>main-0043-checksum</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
<workflow_version/>
<created_user>-</created_user>
- <created_date>2021/05/31 11:58:41.121</created_date>
+ <created_date>2022/04/29 10:42:16.470</created_date>
<modified_user>-</modified_user>
- <modified_date>2021/05/31 11:58:41.121</modified_date>
+ <modified_date>2022/04/29 10:42:16.470</modified_date>
<parameters>
</parameters>
<actions>
@@ -44,56 +44,36 @@ limitations under the License.
<weekDay>1</weekDay>
<DayOfMonth>1</DayOfMonth>
<parallel>N</parallel>
- <xloc>128</xloc>
- <yloc>112</yloc>
+ <xloc>50</xloc>
+ <yloc>50</yloc>
<attributes_hac/>
</action>
<action>
- <name>Run Apache Tika Tests</name>
+ <name>Run Checksum Unit Test</name>
<description/>
<type>RunPipelineTests</type>
<attributes/>
<test_names>
<test_name>
- <name>0037-apache-tika UNIT</name>
+ <name>0043-checksum UNIT</name>
</test_name>
</test_names>
<parallel>N</parallel>
- <xloc>416</xloc>
- <yloc>112</yloc>
+ <xloc>192</xloc>
+ <yloc>48</yloc>
<attributes_hac/>
</action>
</actions>
<hops>
<hop>
<from>Start</from>
- <to>Run Apache Tika Tests</to>
+ <to>Run Checksum Unit Test</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>Y</unconditional>
</hop>
</hops>
<notepads>
- <notepad>
- <note>Test reading CSV files with the CSV Input transform </note>
- <xloc>128</xloc>
- <yloc>208</yloc>
- <width>279</width>
- <heigth>26</heigth>
- <fontname>Inter</fontname>
- <fontsize>11</fontsize>
- <fontbold>N</fontbold>
- <fontitalic>N</fontitalic>
- <fontcolorred>14</fontcolorred>
- <fontcolorgreen>58</fontcolorgreen>
- <fontcolorblue>90</fontcolorblue>
- <backgroundcolorred>201</backgroundcolorred>
- <backgroundcolorgreen>232</backgroundcolorgreen>
- <backgroundcolorblue>251</backgroundcolorblue>
- <bordercolorred>14</bordercolorred>
- <bordercolorgreen>58</bordercolorgreen>
- <bordercolorblue>90</bordercolorblue>
- </notepad>
</notepads>
<attributes/>
</workflow>
diff --git a/integration-tests/transforms/metadata/dataset/golden-checksum.json b/integration-tests/transforms/metadata/dataset/golden-checksum.json
new file mode 100644
index 0000000000..231bc72c60
--- /dev/null
+++ b/integration-tests/transforms/metadata/dataset/golden-checksum.json
@@ -0,0 +1,32 @@
+{
+ "base_filename": "data-set-filename.csv",
+ "name": "golden-checksum",
+ "description": "",
+ "dataset_fields": [
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 5,
+ "field_precision": 0,
+ "field_format": "####0;-####0",
+ "field_name": "row"
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_format": "",
+ "field_name": "value"
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_format": "",
+ "field_name": "checksum"
+ }
+ ],
+ "folder_name": ""
+}
\ No newline at end of file
diff --git a/integration-tests/transforms/metadata/unit-test/0043-checksum UNIT.json b/integration-tests/transforms/metadata/unit-test/0043-checksum UNIT.json
new file mode 100644
index 0000000000..22dd6946fe
--- /dev/null
+++ b/integration-tests/transforms/metadata/unit-test/0043-checksum UNIT.json
@@ -0,0 +1,38 @@
+{
+ "variableValues": [],
+ "database_replacements": [],
+ "autoOpening": true,
+ "basePath": "",
+ "golden_data_sets": [
+ {
+ "field_mappings": [
+ {
+ "transform_field": "row",
+ "data_set_field": "row"
+ },
+ {
+ "transform_field": "value",
+ "data_set_field": "value"
+ },
+ {
+ "transform_field": "checksum",
+ "data_set_field": "checksum"
+ }
+ ],
+ "field_order": [
+ "row",
+ "value",
+ "checksum"
+ ],
+ "transform_name": "verify",
+ "data_set_name": "golden-checksum"
+ }
+ ],
+ "input_data_sets": [],
+ "name": "0043-checksum UNIT",
+ "description": "",
+ "trans_test_tweaks": [],
+ "persist_filename": "",
+ "pipeline_filename": "./0043-checksum.hpl",
+ "test_type": "UNIT_TEST"
+}
\ No newline at end of file
diff --git a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
index 17c2e77666..3e77b05fc2 100644
--- a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
+++ b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java
@@ -160,6 +160,7 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
private byte[] createCheckSum(Object[] r) throws Exception {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ boolean valueAdded = false;
// Loop through fields
for (int i = 0; i < data.fieldnr; i++) {
@@ -167,18 +168,20 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
if (valueMeta.isBinary()) {
byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]);
if (bytes != null) {
+ valueAdded = true;
baos.write(bytes);
}
} else {
Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]);
if (value != null) {
+ valueAdded = true;
baos.write(value.toString().getBytes());
}
}
}
// Return null when all input values are null.
- if (baos.size() == 0) {
+ if (!valueAdded) {
return null;
}
@@ -209,6 +212,7 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
byte[] byteArray;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ boolean valueAdded = false;
// Loop through fields
for (int i = 0; i < data.fieldnr; i++) {
@@ -217,18 +221,20 @@ public class CheckSum extends BaseTransform<CheckSumMeta, CheckSumData> {
if (valueMeta.isBinary()) {
byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]);
if (bytes != null) {
+ valueAdded = true;
baos.write(bytes);
}
} else {
Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]);
if (value != null) {
+ valueAdded= true;
baos.write(value.toString().getBytes());
}
}
}
// Return null when all input values are null.
- if (baos.size() == 0) {
+ if (!valueAdded) {
return null;
}