You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hop.apache.org by ha...@apache.org on 2022/06/03 06:25:18 UTC
[hop] branch master updated: HOP-3968 [Memory]GroupBy: add an aggregation option to concatenate distincts values
This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/master by this push:
new 3ba20577e1 HOP-3968 [Memory]GroupBy: add an aggregation option to concatenate distincts values
new dc59a9d939 Merge pull request #1515 from nadment/HOP-3968
3ba20577e1 is described below
commit 3ba20577e12cb1e27cc2912a8b431e287410e20a
Author: Nicolas Adment <na...@gmail.com>
AuthorDate: Wed Jun 1 21:07:12 2022 +0200
HOP-3968 [Memory]GroupBy: add an aggregation option to concatenate
distincts values
---
.../ROOT/pages/pipeline/transforms/groupby.adoc | 3 +-
.../transforms/0006-groupby-concat.hpl | 366 +++++++++++++++++++++
.../transforms/datasets/golden-groupby-concat.csv | 7 +
integration-tests/transforms/main-0006-groupby.hwf | 3 +
.../metadata/dataset/golden-groupby-concat.json | 48 +++
.../unit-test/0006-groupby-concat UNIT.json | 48 +++
.../pipeline/transforms/groupby/Aggregation.java | 8 +-
.../hop/pipeline/transforms/groupby/GroupBy.java | 23 +-
.../pipeline/transforms/groupby/GroupByMeta.java | 1 +
.../groupby/messages/messages_en_US.properties | 1 +
.../transforms/memgroupby/MemoryGroupBy.java | 21 ++
.../transforms/memgroupby/MemoryGroupByMeta.java | 9 +-
.../memgroupby/messages/messages_en_US.properties | 1 +
13 files changed, 531 insertions(+), 8 deletions(-)
diff --git a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/groupby.adoc b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/groupby.adoc
index e8a9a5ef19..6d11ed7ef0 100644
--- a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/groupby.adoc
+++ b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/groupby.adoc
@@ -76,5 +76,6 @@ Here are the available aggregation methods :
- Number of rows (without field argument)
- Standard deviation (sample)
- Percentile (nearest-rank method)
-- Concatenate string separated by by new line (CRLF)
+- Concatenate string separated by new line (CRLF)
+- Concatenate distinct values separated by <Value>: specify the separator in the Value column (This supports xref::variables.adoc#_hexadecimal_values[hexadecimals])
|===
\ No newline at end of file
diff --git a/integration-tests/transforms/0006-groupby-concat.hpl b/integration-tests/transforms/0006-groupby-concat.hpl
new file mode 100644
index 0000000000..6ee0271f9a
--- /dev/null
+++ b/integration-tests/transforms/0006-groupby-concat.hpl
@@ -0,0 +1,366 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<pipeline>
+ <info>
+ <name>0006-groupby-concat</name>
+ <name_sync_with_filename>Y</name_sync_with_filename>
+ <description/>
+ <extended_description/>
+ <pipeline_version/>
+ <pipeline_type>Normal</pipeline_type>
+ <parameters>
+ </parameters>
+ <capture_transform_performance>N</capture_transform_performance>
+ <transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
+ <transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
+ <created_user>-</created_user>
+ <created_date>2022/05/31 22:01:25.492</created_date>
+ <modified_user>-</modified_user>
+ <modified_date>2022/05/31 22:01:25.492</modified_date>
+ <key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key>
+ <is_key_private>N</is_key_private>
+ </info>
+ <notepads>
+ </notepads>
+ <order>
+ <hop>
+ <from>Data grid</from>
+ <to>Memory group by</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Data grid</from>
+ <to>Sort rows</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Sort rows</from>
+ <to>Group by</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Memory group by</from>
+ <to>Sort rows result</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Group by</from>
+ <to>Sort rows result</to>
+ <enabled>Y</enabled>
+ </hop>
+ </order>
+ <transform>
+ <name>Data grid</name>
+ <type>DataGrid</type>
+ <description/>
+ <distribute>N</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <fields>
+ <field>
+ <set_empty_string>N</set_empty_string>
+ <length>-1</length>
+ <name>KEY1</name>
+ <precision>-1</precision>
+ <type>String</type>
+ </field>
+ <field>
+ <set_empty_string>N</set_empty_string>
+ <length>-1</length>
+ <name>COLOR</name>
+ <precision>-1</precision>
+ <type>String</type>
+ </field>
+ <field>
+ <set_empty_string>N</set_empty_string>
+ <length>-1</length>
+ <name>NUMBER</name>
+ <precision>-1</precision>
+ <type>Integer</type>
+ </field>
+ </fields>
+ <data>
+ <line>
+ <item>A</item>
+ <item>BLUE</item>
+ <item>1</item>
+ </line>
+ <line>
+ <item>A</item>
+ <item>RED</item>
+ <item>1</item>
+ </line>
+ <line>
+ <item>B</item>
+ <item>YELLOW</item>
+ <item>3</item>
+ </line>
+ <line>
+ <item>B</item>
+ <item>RED</item>
+ <item>4</item>
+ </line>
+ <line>
+ <item>B</item>
+ <item>GREEN</item>
+ <item>3</item>
+ </line>
+ <line>
+ <item>B</item>
+ <item>YELLOW</item>
+ <item>10</item>
+ </line>
+ <line>
+ <item>B</item>
+ <item>GREEN</item>
+ <item>1</item>
+ </line>
+ <line>
+ <item>C</item>
+ <item>BLUE</item>
+ <item>2</item>
+ </line>
+ <line>
+ <item>C</item>
+ <item>YELLOW</item>
+ <item>10</item>
+ </line>
+ <line>
+ <item>C</item>
+ <item>YELLOW</item>
+ <item>3</item>
+ </line>
+ <line>
+ <item>A</item>
+ <item>BLUE</item>
+ <item>2</item>
+ </line>
+ <line>
+ <item>A</item>
+ <item>BLUE</item>
+ <item>1</item>
+ </line>
+ <line>
+ <item>A</item>
+ <item>RED</item>
+ <item>3</item>
+ </line>
+ <line>
+ <item>B</item>
+ <item>RED</item>
+ <item>9</item>
+ </line>
+ <line>
+ <item>B</item>
+ <item>RED</item>
+ <item>10</item>
+ </line>
+ <line>
+ <item>B</item>
+ <item>YELLOW</item>
+ <item>1</item>
+ </line>
+ </data>
+ <attributes/>
+ <GUI>
+ <xloc>128</xloc>
+ <yloc>96</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Group by</name>
+ <type>GroupBy</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <add_linenr>N</add_linenr>
+ <ignore_aggregate>N</ignore_aggregate>
+ <fields>
+ <field>
+ <aggregate>COLORS</aggregate>
+ <subject>COLOR</subject>
+ <type>CONCAT_STRING</type>
+ <valuefield>|</valuefield>
+ </field>
+ <field>
+ <aggregate>DISTINCT_COLORS</aggregate>
+ <subject>COLOR</subject>
+ <type>CONCAT_DISTINCT</type>
+ <valuefield>|</valuefield>
+ </field>
+ <field>
+ <aggregate>NUMBERS</aggregate>
+ <subject>NUMBER</subject>
+ <type>CONCAT_STRING</type>
+ <valuefield>;</valuefield>
+ </field>
+ <field>
+ <aggregate>DISTINCT_NUMBERS</aggregate>
+ <subject>NUMBER</subject>
+ <type>CONCAT_DISTINCT</type>
+ <valuefield>;</valuefield>
+ </field>
+ </fields>
+ <give_back_row>N</give_back_row>
+ <directory>${java.io.tmpdir}</directory>
+ <group>
+ <field>
+ <name>KEY1</name>
+ </field>
+ </group>
+ <linenr_fieldname/>
+ <all_rows>N</all_rows>
+ <prefix>grp</prefix>
+ <attributes/>
+ <GUI>
+ <xloc>256</xloc>
+ <yloc>208</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Memory group by</name>
+ <type>MemoryGroupBy</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <give_back_row>N</give_back_row>
+ <group>
+ <field>
+ <name>KEY1</name>
+ </field>
+ </group>
+ <fields>
+ <field>
+ <aggregate>COLORS</aggregate>
+ <subject>COLOR</subject>
+ <type>CONCAT_STRING</type>
+ <valuefield>|</valuefield>
+ </field>
+ <field>
+ <aggregate>DISTINCT_COLORS</aggregate>
+ <subject>COLOR</subject>
+ <type>CONCAT_DISTINCT</type>
+ <valuefield>|</valuefield>
+ </field>
+ <field>
+ <aggregate>NUMBERS</aggregate>
+ <subject>NUMBER</subject>
+ <type>CONCAT_STRING</type>
+ <valuefield>;</valuefield>
+ </field>
+ <field>
+ <aggregate>DISTINCT_NUMBERS</aggregate>
+ <subject>NUMBER</subject>
+ <type>CONCAT_DISTINCT</type>
+ <valuefield>;</valuefield>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>256</xloc>
+ <yloc>96</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Sort rows</name>
+ <type>SortRows</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <directory>${java.io.tmpdir}</directory>
+ <prefix>out</prefix>
+ <sort_size>1000000</sort_size>
+ <free_memory/>
+ <compress>N</compress>
+ <compress_variable/>
+ <unique_rows>N</unique_rows>
+ <fields>
+ <field>
+ <name>KEY1</name>
+ <ascending>Y</ascending>
+ <case_sensitive>N</case_sensitive>
+ <collator_enabled>N</collator_enabled>
+ <collator_strength>0</collator_strength>
+ <presorted>N</presorted>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>128</xloc>
+ <yloc>208</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Sort rows result</name>
+ <type>SortRows</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <directory>${java.io.tmpdir}</directory>
+ <prefix>out</prefix>
+ <sort_size>1000000</sort_size>
+ <free_memory/>
+ <compress>N</compress>
+ <compress_variable/>
+ <unique_rows>N</unique_rows>
+ <fields>
+ <field>
+ <name>KEY1</name>
+ <ascending>Y</ascending>
+ <case_sensitive>N</case_sensitive>
+ <collator_enabled>N</collator_enabled>
+ <collator_strength>0</collator_strength>
+ <presorted>N</presorted>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>384</xloc>
+ <yloc>144</yloc>
+ </GUI>
+ </transform>
+ <transform_error_handling>
+ </transform_error_handling>
+ <attributes/>
+</pipeline>
diff --git a/integration-tests/transforms/datasets/golden-groupby-concat.csv b/integration-tests/transforms/datasets/golden-groupby-concat.csv
new file mode 100644
index 0000000000..1373204f84
--- /dev/null
+++ b/integration-tests/transforms/datasets/golden-groupby-concat.csv
@@ -0,0 +1,7 @@
+KEY1,COLORS,DISTINCT_COLORS,NUMBERS,DISTINCT_NUMBERS
+A,BLUE|RED|BLUE|BLUE|RED,BLUE|RED,1;1;2;1;3,1;2;3
+A,BLUE|RED|BLUE|BLUE|RED,BLUE|RED,1;1;2;1;3,1;2;3
+B,YELLOW|RED|GREEN|YELLOW|GREEN|RED|RED|YELLOW,GREEN|RED|YELLOW,3;4;3;10;1;9;10;1,1;3;4;9;10
+B,YELLOW|RED|GREEN|YELLOW|GREEN|RED|RED|YELLOW,GREEN|RED|YELLOW,3;4;3;10;1;9;10;1,1;3;4;9;10
+C,BLUE|YELLOW|YELLOW,BLUE|YELLOW,2;10;3,2;3;10
+C,BLUE|YELLOW|YELLOW,BLUE|YELLOW,2;10;3,2;3;10
diff --git a/integration-tests/transforms/main-0006-groupby.hwf b/integration-tests/transforms/main-0006-groupby.hwf
index f458c25713..df618e90df 100644
--- a/integration-tests/transforms/main-0006-groupby.hwf
+++ b/integration-tests/transforms/main-0006-groupby.hwf
@@ -63,6 +63,9 @@ limitations under the License.
<test_name>
<name>0006-groupby-all-rows UNIT</name>
</test_name>
+ <test_name>
+ <name>0006-groupby-concat UNIT</name>
+ </test_name>
</test_names>
<parallel>N</parallel>
<xloc>432</xloc>
diff --git a/integration-tests/transforms/metadata/dataset/golden-groupby-concat.json b/integration-tests/transforms/metadata/dataset/golden-groupby-concat.json
new file mode 100644
index 0000000000..447a62b891
--- /dev/null
+++ b/integration-tests/transforms/metadata/dataset/golden-groupby-concat.json
@@ -0,0 +1,48 @@
+{
+ "base_filename": "golden-groupby-concat.csv",
+ "name": "golden-groupby-concat",
+ "description": "",
+ "dataset_fields": [
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_format": "",
+ "field_name": "KEY1"
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_format": "",
+ "field_name": "COLORS"
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_format": "",
+ "field_name": "DISTINCT_COLORS"
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_format": "",
+ "field_name": "NUMBERS"
+ },
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_format": "",
+ "field_name": "DISTINCT_NUMBERS"
+ }
+ ],
+ "folder_name": ""
+}
\ No newline at end of file
diff --git a/integration-tests/transforms/metadata/unit-test/0006-groupby-concat UNIT.json b/integration-tests/transforms/metadata/unit-test/0006-groupby-concat UNIT.json
new file mode 100644
index 0000000000..f70a834510
--- /dev/null
+++ b/integration-tests/transforms/metadata/unit-test/0006-groupby-concat UNIT.json
@@ -0,0 +1,48 @@
+{
+ "variableValues": [],
+ "database_replacements": [],
+ "autoOpening": true,
+ "basePath": "",
+ "golden_data_sets": [
+ {
+ "field_mappings": [
+ {
+ "transform_field": "KEY1",
+ "data_set_field": "KEY1"
+ },
+ {
+ "transform_field": "COLORS",
+ "data_set_field": "COLORS"
+ },
+ {
+ "transform_field": "DISTINCT_COLORS",
+ "data_set_field": "DISTINCT_COLORS"
+ },
+ {
+ "transform_field": "NUMBERS",
+ "data_set_field": "NUMBERS"
+ },
+ {
+ "transform_field": "DISTINCT_NUMBERS",
+ "data_set_field": "DISTINCT_NUMBERS"
+ }
+ ],
+ "field_order": [
+ "KEY1",
+ "COLORS",
+ "DISTINCT_COLORS",
+ "NUMBERS",
+ "DISTINCT_NUMBERS"
+ ],
+ "transform_name": "Sort rows result",
+ "data_set_name": "golden-groupby-concat"
+ }
+ ],
+ "input_data_sets": [],
+ "name": "0006-groupby-concat UNIT",
+ "description": "",
+ "trans_test_tweaks": [],
+ "persist_filename": "",
+ "pipeline_filename": "./0006-groupby-concat.hpl",
+ "test_type": "UNIT_TEST"
+}
\ No newline at end of file
diff --git a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/Aggregation.java b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/Aggregation.java
index c12951e75f..aa85141442 100644
--- a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/Aggregation.java
+++ b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/Aggregation.java
@@ -69,6 +69,8 @@ public class Aggregation implements Cloneable {
public static final int TYPE_GROUP_CONCAT_STRING_CRLF = 21;
+ public static final int TYPE_GROUP_CONCAT_DISTINCT = 22;
+
public static final String[]
typeGroupLabel = /* WARNING: DO NOT TRANSLATE THIS. WE ARE SERIOUS, DON'T TRANSLATE! */ {
"-",
@@ -92,7 +94,8 @@ public class Aggregation implements Cloneable {
"COUNT_ANY",
"STD_DEV_SAMPLE",
"PERCENTILE_NEAREST_RANK",
- "CONCAT_STRING_CRLF"
+ "CONCAT_STRING_CRLF",
+ "CONCAT_DISTINCT",
};
public static final String[] typeGroupLongDesc = {
@@ -117,7 +120,8 @@ public class Aggregation implements Cloneable {
BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.COUNT_ANY"),
BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.STANDARD_DEVIATION_SAMPLE"),
BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.PERCENTILE_NEAREST_RANK"),
- BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.CONCAT_STRING_CRLF")
+ BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.CONCAT_STRING_CRLF"),
+ BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.CONCAT_DISTINCT")
};
diff --git a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupBy.java b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupBy.java
index fc373f9e13..d530825273 100644
--- a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupBy.java
+++ b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupBy.java
@@ -33,7 +33,6 @@ import org.apache.hop.pipeline.Pipeline;
import org.apache.hop.pipeline.PipelineMeta;
import org.apache.hop.pipeline.transform.BaseTransform;
import org.apache.hop.pipeline.transform.TransformMeta;
-
import java.io.*;
import java.net.SocketTimeoutException;
import java.util.*;
@@ -537,8 +536,12 @@ public class GroupBy extends BaseTransform<GroupByMeta, GroupByData> {
}
sb.append(subjMeta.getString(subj));
}
-
break;
+ case Aggregation.TYPE_GROUP_CONCAT_DISTINCT:
+ if (subj != null) {
+ SortedSet<Object> set = (SortedSet<Object>) value;
+ set.add(subj);
+ }
default:
break;
}
@@ -616,6 +619,10 @@ public class GroupBy extends BaseTransform<GroupByMeta, GroupByData> {
vMeta = new ValueMetaString(fieldName);
v = new StringBuilder();
break;
+ case Aggregation.TYPE_GROUP_CONCAT_DISTINCT:
+ vMeta = new ValueMetaString(fieldName);
+ v = new TreeSet<>();
+ break;
default:
// TODO raise an error here because we cannot continue successfully maybe the UI should
// validate this
@@ -757,6 +764,18 @@ public class GroupBy extends BaseTransform<GroupByMeta, GroupByData> {
case Aggregation.TYPE_GROUP_CONCAT_STRING:
ag = ((StringBuilder) ag).toString();
break;
+ case Aggregation.TYPE_GROUP_CONCAT_DISTINCT:
+ IValueMeta subjMeta = data.inputRowMeta.getValueMeta(data.subjectnrs[i]);
+ String separator = "";
+ if (!Utils.isEmpty(aggregation.getValue())) {
+ separator = resolve(aggregation.getValue());
+ }
+ StringJoiner joiner = new StringJoiner(separator);
+ for (Object value: (SortedSet<Object>) ag) {
+ joiner.add(subjMeta.getString(value));
+ }
+ ag = joiner.toString();
+ break;
default:
break;
}
diff --git a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupByMeta.java b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupByMeta.java
index 52d5930e36..15f8bb5b09 100644
--- a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupByMeta.java
+++ b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupByMeta.java
@@ -281,6 +281,7 @@ public class GroupByMeta extends BaseTransformMeta<GroupBy, GroupByData> {
break;
case Aggregation.TYPE_GROUP_CONCAT_STRING:
case Aggregation.TYPE_GROUP_CONCAT_STRING_CRLF:
+ case Aggregation.TYPE_GROUP_CONCAT_DISTINCT:
valueType = IValueMeta.TYPE_STRING;
break;
default:
diff --git a/plugins/transforms/groupby/src/main/resources/org/apache/hop/pipeline/transforms/groupby/messages/messages_en_US.properties b/plugins/transforms/groupby/src/main/resources/org/apache/hop/pipeline/transforms/groupby/messages/messages_en_US.properties
index 95372f846b..3610cce2ca 100644
--- a/plugins/transforms/groupby/src/main/resources/org/apache/hop/pipeline/transforms/groupby/messages/messages_en_US.properties
+++ b/plugins/transforms/groupby/src/main/resources/org/apache/hop/pipeline/transforms/groupby/messages/messages_en_US.properties
@@ -75,6 +75,7 @@ GroupByMeta.TypeGroupLongDesc.MEDIAN=Median
GroupByMeta.TypeGroupLongDesc.COUNT_ANY=Number of rows (without field argument)
GroupByMeta.TypeGroupLongDesc.CONCAT_STRING=Concatenate strings separated by
GroupByMeta.TypeGroupLongDesc.CONCAT_STRING_CRLF=Concatenate strings separated by new line (CRLF)
+GroupByMeta.TypeGroupLongDesc.CONCAT_DISTINCT=Concatenate distinct values separated by
GroupByMeta.Injection.PASS_ALL_ROWS=Pass all rows?
GroupByMeta.Injection.TEMP_DIRECTORY=Temporary directory
GroupByMeta.Injection.TEMP_FILE_PREFIX=Temporary file prefix
diff --git a/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupBy.java b/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupBy.java
index d761d12481..2a05f5b55d 100644
--- a/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupBy.java
+++ b/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupBy.java
@@ -367,6 +367,11 @@ public class MemoryGroupBy extends BaseTransform<MemoryGroupByMeta, MemoryGroupB
sb.append(subjMeta.getString(subj));
}
break;
+ case MemoryGroupByMeta.TYPE_GROUP_CONCAT_DISTINCT:
+ if (subj != null) {
+ SortedSet<Object> set = (SortedSet<Object>) value;
+ set.add(subj);
+ }
default:
break;
}
@@ -436,6 +441,10 @@ public class MemoryGroupBy extends BaseTransform<MemoryGroupByMeta, MemoryGroupB
vMeta = new ValueMetaString(meta.getAggregateField()[i]);
v = new StringBuilder();
break;
+ case MemoryGroupByMeta.TYPE_GROUP_CONCAT_DISTINCT:
+ vMeta = new ValueMetaString(meta.getAggregateField()[i]);
+ v = new TreeSet<>();
+ break;
default:
throw new HopException(
"Unknown data type for aggregation : " + meta.getAggregateField()[i]);
@@ -524,6 +533,18 @@ public class MemoryGroupBy extends BaseTransform<MemoryGroupByMeta, MemoryGroupB
case MemoryGroupByMeta.TYPE_GROUP_CONCAT_STRING:
ag = ((StringBuilder) ag).toString();
break;
+ case MemoryGroupByMeta.TYPE_GROUP_CONCAT_DISTINCT:
+ IValueMeta subjMeta = data.inputRowMeta.getValueMeta(data.subjectnrs[i]);
+ String separator = "";
+ if (!Utils.isEmpty(meta.getValueField()[i])) {
+ separator = resolve(meta.getValueField()[i]);
+ }
+ StringJoiner joiner = new StringJoiner(separator);
+ for (Object value: (SortedSet<Object>) ag) {
+ joiner.add(subjMeta.getString(value));
+ }
+ ag = joiner.toString();
+ break;
default:
break;
}
diff --git a/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupByMeta.java b/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupByMeta.java
index f116026013..a11cc98a38 100644
--- a/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupByMeta.java
+++ b/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupByMeta.java
@@ -91,6 +91,8 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
public static final int TYPE_GROUP_COUNT_DISTINCT = 15;
public static final int TYPE_GROUP_COUNT_ANY = 16;
+
+ public static final int TYPE_GROUP_CONCAT_DISTINCT = 17;
public static final String[]
typeGroupCode = /* WARNING: DO NOT TRANSLATE THIS. WE ARE SERIOUS, DON'T TRANSLATE! */ {
@@ -111,6 +113,7 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
"CONCAT_STRING",
"COUNT_DISTINCT",
"COUNT_ANY",
+ "CONCAT_DISTINCT",
};
public static final String[] typeGroupLongDesc = {
@@ -131,6 +134,7 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
BaseMessages.getString(PKG, "MemoryGroupByMeta.TypeGroupLongDesc.CONCAT_STRING"),
BaseMessages.getString(PKG, "MemoryGroupByMeta.TypeGroupLongDesc.COUNT_DISTINCT"),
BaseMessages.getString(PKG, "MemoryGroupByMeta.TypeGroupLongDesc.COUNT_ANY"),
+ BaseMessages.getString(PKG, "MemoryGroupByMeta.TypeGroupLongDesc.CONCAT_DISTINCT"),
};
@Injection(name = "GROUPFIELD", group = "FIELDS")
@@ -388,9 +392,6 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
case TYPE_GROUP_COUNT_ANY:
valueType = IValueMeta.TYPE_INTEGER;
break;
- case TYPE_GROUP_CONCAT_COMMA:
- valueType = IValueMeta.TYPE_STRING;
- break;
case TYPE_GROUP_SUM:
case TYPE_GROUP_AVERAGE:
if (subj.isNumeric()) {
@@ -404,7 +405,9 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
case TYPE_GROUP_STANDARD_DEVIATION:
valueType = IValueMeta.TYPE_NUMBER;
break;
+ case TYPE_GROUP_CONCAT_COMMA:
case TYPE_GROUP_CONCAT_STRING:
+ case TYPE_GROUP_CONCAT_DISTINCT:
valueType = IValueMeta.TYPE_STRING;
break;
default:
diff --git a/plugins/transforms/memgroupby/src/main/resources/org/apache/hop/pipeline/transforms/memgroupby/messages/messages_en_US.properties b/plugins/transforms/memgroupby/src/main/resources/org/apache/hop/pipeline/transforms/memgroupby/messages/messages_en_US.properties
index f33bfe686b..b00e052c86 100644
--- a/plugins/transforms/memgroupby/src/main/resources/org/apache/hop/pipeline/transforms/memgroupby/messages/messages_en_US.properties
+++ b/plugins/transforms/memgroupby/src/main/resources/org/apache/hop/pipeline/transforms/memgroupby/messages/messages_en_US.properties
@@ -43,6 +43,7 @@ MemoryGroupByMeta.TypeGroupLongDesc.LAST_INCL_NULL=Last value
MemoryGroupByDialog.Log.GettingKeyInfo=getting key info...
MemoryGroupByDialog.AlwaysAddResult.ToolTip=To make sure we always output a correct count aggregation we always output at least one row, even if there were no input rows.\nThis makes the behavior consistent with the aggregation in an SQL GROUP BY.
MemoryGroupByMeta.TypeGroupLongDesc.CONCAT_STRING=Concatenate strings separated by
+MemoryGroupByMeta.TypeGroupLongDesc.CONCAT_DISTINCT=Concatenate distinct values separated by
MemoryGroupByMeta.TypeGroupLongDesc.SUM=Sum
MemoryGroupByDialog.GetFields.Button=\ &Get Fields
MemoryGroupByDialog.ColumnInfo.GroupField=Group field