You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hop.apache.org by ha...@apache.org on 2022/06/03 06:25:18 UTC

[hop] branch master updated: HOP-3968 [Memory]GroupBy: add an aggregation option to concatenate distincts values

This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/master by this push:
     new 3ba20577e1 HOP-3968 [Memory]GroupBy: add an aggregation option to concatenate distincts values
     new dc59a9d939 Merge pull request #1515 from nadment/HOP-3968
3ba20577e1 is described below

commit 3ba20577e12cb1e27cc2912a8b431e287410e20a
Author: Nicolas Adment <na...@gmail.com>
AuthorDate: Wed Jun 1 21:07:12 2022 +0200

    HOP-3968 [Memory]GroupBy: add an aggregation option to concatenate
    distincts values
---
 .../ROOT/pages/pipeline/transforms/groupby.adoc    |   3 +-
 .../transforms/0006-groupby-concat.hpl             | 366 +++++++++++++++++++++
 .../transforms/datasets/golden-groupby-concat.csv  |   7 +
 integration-tests/transforms/main-0006-groupby.hwf |   3 +
 .../metadata/dataset/golden-groupby-concat.json    |  48 +++
 .../unit-test/0006-groupby-concat UNIT.json        |  48 +++
 .../pipeline/transforms/groupby/Aggregation.java   |   8 +-
 .../hop/pipeline/transforms/groupby/GroupBy.java   |  23 +-
 .../pipeline/transforms/groupby/GroupByMeta.java   |   1 +
 .../groupby/messages/messages_en_US.properties     |   1 +
 .../transforms/memgroupby/MemoryGroupBy.java       |  21 ++
 .../transforms/memgroupby/MemoryGroupByMeta.java   |   9 +-
 .../memgroupby/messages/messages_en_US.properties  |   1 +
 13 files changed, 531 insertions(+), 8 deletions(-)

diff --git a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/groupby.adoc b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/groupby.adoc
index e8a9a5ef19..6d11ed7ef0 100644
--- a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/groupby.adoc
+++ b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/groupby.adoc
@@ -76,5 +76,6 @@ Here are the available aggregation methods :
 - Number of rows (without field argument)
 - Standard deviation (sample)
 - Percentile (nearest-rank method)
-- Concatenate string separated by by new line (CRLF)
+- Concatenate string separated by new line (CRLF)
+- Concatenate distinct values separated by <Value>: specify the separator in the Value column (This supports xref::variables.adoc#_hexadecimal_values[hexadecimals])
 |===
\ No newline at end of file
diff --git a/integration-tests/transforms/0006-groupby-concat.hpl b/integration-tests/transforms/0006-groupby-concat.hpl
new file mode 100644
index 0000000000..6ee0271f9a
--- /dev/null
+++ b/integration-tests/transforms/0006-groupby-concat.hpl
@@ -0,0 +1,366 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<pipeline>
+  <info>
+    <name>0006-groupby-concat</name>
+    <name_sync_with_filename>Y</name_sync_with_filename>
+    <description/>
+    <extended_description/>
+    <pipeline_version/>
+    <pipeline_type>Normal</pipeline_type>
+    <parameters>
+    </parameters>
+    <capture_transform_performance>N</capture_transform_performance>
+    <transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
+    <transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
+    <created_user>-</created_user>
+    <created_date>2022/05/31 22:01:25.492</created_date>
+    <modified_user>-</modified_user>
+    <modified_date>2022/05/31 22:01:25.492</modified_date>
+    <key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key>
+    <is_key_private>N</is_key_private>
+  </info>
+  <notepads>
+  </notepads>
+  <order>
+    <hop>
+      <from>Data grid</from>
+      <to>Memory group by</to>
+      <enabled>Y</enabled>
+    </hop>
+    <hop>
+      <from>Data grid</from>
+      <to>Sort rows</to>
+      <enabled>Y</enabled>
+    </hop>
+    <hop>
+      <from>Sort rows</from>
+      <to>Group by</to>
+      <enabled>Y</enabled>
+    </hop>
+    <hop>
+      <from>Memory group by</from>
+      <to>Sort rows result</to>
+      <enabled>Y</enabled>
+    </hop>
+    <hop>
+      <from>Group by</from>
+      <to>Sort rows result</to>
+      <enabled>Y</enabled>
+    </hop>
+  </order>
+  <transform>
+    <name>Data grid</name>
+    <type>DataGrid</type>
+    <description/>
+    <distribute>N</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <fields>
+      <field>
+        <set_empty_string>N</set_empty_string>
+        <length>-1</length>
+        <name>KEY1</name>
+        <precision>-1</precision>
+        <type>String</type>
+      </field>
+      <field>
+        <set_empty_string>N</set_empty_string>
+        <length>-1</length>
+        <name>COLOR</name>
+        <precision>-1</precision>
+        <type>String</type>
+      </field>
+      <field>
+        <set_empty_string>N</set_empty_string>
+        <length>-1</length>
+        <name>NUMBER</name>
+        <precision>-1</precision>
+        <type>Integer</type>
+      </field>
+    </fields>
+    <data>
+      <line>
+        <item>A</item>
+        <item>BLUE</item>
+        <item>1</item>
+      </line>
+      <line>
+        <item>A</item>
+        <item>RED</item>
+        <item>1</item>
+      </line>
+      <line>
+        <item>B</item>
+        <item>YELLOW</item>
+        <item>3</item>
+      </line>
+      <line>
+        <item>B</item>
+        <item>RED</item>
+        <item>4</item>
+      </line>
+      <line>
+        <item>B</item>
+        <item>GREEN</item>
+        <item>3</item>
+      </line>
+      <line>
+        <item>B</item>
+        <item>YELLOW</item>
+        <item>10</item>
+      </line>
+      <line>
+        <item>B</item>
+        <item>GREEN</item>
+        <item>1</item>
+      </line>
+      <line>
+        <item>C</item>
+        <item>BLUE</item>
+        <item>2</item>
+      </line>
+      <line>
+        <item>C</item>
+        <item>YELLOW</item>
+        <item>10</item>
+      </line>
+      <line>
+        <item>C</item>
+        <item>YELLOW</item>
+        <item>3</item>
+      </line>
+      <line>
+        <item>A</item>
+        <item>BLUE</item>
+        <item>2</item>
+      </line>
+      <line>
+        <item>A</item>
+        <item>BLUE</item>
+        <item>1</item>
+      </line>
+      <line>
+        <item>A</item>
+        <item>RED</item>
+        <item>3</item>
+      </line>
+      <line>
+        <item>B</item>
+        <item>RED</item>
+        <item>9</item>
+      </line>
+      <line>
+        <item>B</item>
+        <item>RED</item>
+        <item>10</item>
+      </line>
+      <line>
+        <item>B</item>
+        <item>YELLOW</item>
+        <item>1</item>
+      </line>
+    </data>
+    <attributes/>
+    <GUI>
+      <xloc>128</xloc>
+      <yloc>96</yloc>
+    </GUI>
+  </transform>
+  <transform>
+    <name>Group by</name>
+    <type>GroupBy</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <add_linenr>N</add_linenr>
+    <ignore_aggregate>N</ignore_aggregate>
+    <fields>
+      <field>
+        <aggregate>COLORS</aggregate>
+        <subject>COLOR</subject>
+        <type>CONCAT_STRING</type>
+        <valuefield>|</valuefield>
+      </field>
+      <field>
+        <aggregate>DISTINCT_COLORS</aggregate>
+        <subject>COLOR</subject>
+        <type>CONCAT_DISTINCT</type>
+        <valuefield>|</valuefield>
+      </field>
+      <field>
+        <aggregate>NUMBERS</aggregate>
+        <subject>NUMBER</subject>
+        <type>CONCAT_STRING</type>
+        <valuefield>;</valuefield>
+      </field>
+      <field>
+        <aggregate>DISTINCT_NUMBERS</aggregate>
+        <subject>NUMBER</subject>
+        <type>CONCAT_DISTINCT</type>
+        <valuefield>;</valuefield>
+      </field>
+    </fields>
+    <give_back_row>N</give_back_row>
+    <directory>${java.io.tmpdir}</directory>
+    <group>
+      <field>
+        <name>KEY1</name>
+      </field>
+    </group>
+    <linenr_fieldname/>
+    <all_rows>N</all_rows>
+    <prefix>grp</prefix>
+    <attributes/>
+    <GUI>
+      <xloc>256</xloc>
+      <yloc>208</yloc>
+    </GUI>
+  </transform>
+  <transform>
+    <name>Memory group by</name>
+    <type>MemoryGroupBy</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <give_back_row>N</give_back_row>
+    <group>
+      <field>
+        <name>KEY1</name>
+      </field>
+    </group>
+    <fields>
+      <field>
+        <aggregate>COLORS</aggregate>
+        <subject>COLOR</subject>
+        <type>CONCAT_STRING</type>
+        <valuefield>|</valuefield>
+      </field>
+      <field>
+        <aggregate>DISTINCT_COLORS</aggregate>
+        <subject>COLOR</subject>
+        <type>CONCAT_DISTINCT</type>
+        <valuefield>|</valuefield>
+      </field>
+      <field>
+        <aggregate>NUMBERS</aggregate>
+        <subject>NUMBER</subject>
+        <type>CONCAT_STRING</type>
+        <valuefield>;</valuefield>
+      </field>
+      <field>
+        <aggregate>DISTINCT_NUMBERS</aggregate>
+        <subject>NUMBER</subject>
+        <type>CONCAT_DISTINCT</type>
+        <valuefield>;</valuefield>
+      </field>
+    </fields>
+    <attributes/>
+    <GUI>
+      <xloc>256</xloc>
+      <yloc>96</yloc>
+    </GUI>
+  </transform>
+  <transform>
+    <name>Sort rows</name>
+    <type>SortRows</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <directory>${java.io.tmpdir}</directory>
+    <prefix>out</prefix>
+    <sort_size>1000000</sort_size>
+    <free_memory/>
+    <compress>N</compress>
+    <compress_variable/>
+    <unique_rows>N</unique_rows>
+    <fields>
+      <field>
+        <name>KEY1</name>
+        <ascending>Y</ascending>
+        <case_sensitive>N</case_sensitive>
+        <collator_enabled>N</collator_enabled>
+        <collator_strength>0</collator_strength>
+        <presorted>N</presorted>
+      </field>
+    </fields>
+    <attributes/>
+    <GUI>
+      <xloc>128</xloc>
+      <yloc>208</yloc>
+    </GUI>
+  </transform>
+  <transform>
+    <name>Sort rows result</name>
+    <type>SortRows</type>
+    <description/>
+    <distribute>Y</distribute>
+    <custom_distribution/>
+    <copies>1</copies>
+    <partitioning>
+      <method>none</method>
+      <schema_name/>
+    </partitioning>
+    <directory>${java.io.tmpdir}</directory>
+    <prefix>out</prefix>
+    <sort_size>1000000</sort_size>
+    <free_memory/>
+    <compress>N</compress>
+    <compress_variable/>
+    <unique_rows>N</unique_rows>
+    <fields>
+      <field>
+        <name>KEY1</name>
+        <ascending>Y</ascending>
+        <case_sensitive>N</case_sensitive>
+        <collator_enabled>N</collator_enabled>
+        <collator_strength>0</collator_strength>
+        <presorted>N</presorted>
+      </field>
+    </fields>
+    <attributes/>
+    <GUI>
+      <xloc>384</xloc>
+      <yloc>144</yloc>
+    </GUI>
+  </transform>
+  <transform_error_handling>
+  </transform_error_handling>
+  <attributes/>
+</pipeline>
diff --git a/integration-tests/transforms/datasets/golden-groupby-concat.csv b/integration-tests/transforms/datasets/golden-groupby-concat.csv
new file mode 100644
index 0000000000..1373204f84
--- /dev/null
+++ b/integration-tests/transforms/datasets/golden-groupby-concat.csv
@@ -0,0 +1,7 @@
+KEY1,COLORS,DISTINCT_COLORS,NUMBERS,DISTINCT_NUMBERS
+A,BLUE|RED|BLUE|BLUE|RED,BLUE|RED,1;1;2;1;3,1;2;3
+A,BLUE|RED|BLUE|BLUE|RED,BLUE|RED,1;1;2;1;3,1;2;3
+B,YELLOW|RED|GREEN|YELLOW|GREEN|RED|RED|YELLOW,GREEN|RED|YELLOW,3;4;3;10;1;9;10;1,1;3;4;9;10
+B,YELLOW|RED|GREEN|YELLOW|GREEN|RED|RED|YELLOW,GREEN|RED|YELLOW,3;4;3;10;1;9;10;1,1;3;4;9;10
+C,BLUE|YELLOW|YELLOW,BLUE|YELLOW,2;10;3,2;3;10
+C,BLUE|YELLOW|YELLOW,BLUE|YELLOW,2;10;3,2;3;10
diff --git a/integration-tests/transforms/main-0006-groupby.hwf b/integration-tests/transforms/main-0006-groupby.hwf
index f458c25713..df618e90df 100644
--- a/integration-tests/transforms/main-0006-groupby.hwf
+++ b/integration-tests/transforms/main-0006-groupby.hwf
@@ -63,6 +63,9 @@ limitations under the License.
         <test_name>
           <name>0006-groupby-all-rows UNIT</name>
         </test_name>
+        <test_name>
+          <name>0006-groupby-concat UNIT</name>
+        </test_name>
       </test_names>
       <parallel>N</parallel>
       <xloc>432</xloc>
diff --git a/integration-tests/transforms/metadata/dataset/golden-groupby-concat.json b/integration-tests/transforms/metadata/dataset/golden-groupby-concat.json
new file mode 100644
index 0000000000..447a62b891
--- /dev/null
+++ b/integration-tests/transforms/metadata/dataset/golden-groupby-concat.json
@@ -0,0 +1,48 @@
+{
+  "base_filename": "golden-groupby-concat.csv",
+  "name": "golden-groupby-concat",
+  "description": "",
+  "dataset_fields": [
+    {
+      "field_comment": "",
+      "field_length": -1,
+      "field_type": 2,
+      "field_precision": -1,
+      "field_format": "",
+      "field_name": "KEY1"
+    },
+    {
+      "field_comment": "",
+      "field_length": -1,
+      "field_type": 2,
+      "field_precision": -1,
+      "field_format": "",
+      "field_name": "COLORS"
+    },
+    {
+      "field_comment": "",
+      "field_length": -1,
+      "field_type": 2,
+      "field_precision": -1,
+      "field_format": "",
+      "field_name": "DISTINCT_COLORS"
+    },
+    {
+      "field_comment": "",
+      "field_length": -1,
+      "field_type": 2,
+      "field_precision": -1,
+      "field_format": "",
+      "field_name": "NUMBERS"
+    },
+    {
+      "field_comment": "",
+      "field_length": -1,
+      "field_type": 2,
+      "field_precision": -1,
+      "field_format": "",
+      "field_name": "DISTINCT_NUMBERS"
+    }
+  ],
+  "folder_name": ""
+}
\ No newline at end of file
diff --git a/integration-tests/transforms/metadata/unit-test/0006-groupby-concat UNIT.json b/integration-tests/transforms/metadata/unit-test/0006-groupby-concat UNIT.json
new file mode 100644
index 0000000000..f70a834510
--- /dev/null
+++ b/integration-tests/transforms/metadata/unit-test/0006-groupby-concat UNIT.json	
@@ -0,0 +1,48 @@
+{
+  "variableValues": [],
+  "database_replacements": [],
+  "autoOpening": true,
+  "basePath": "",
+  "golden_data_sets": [
+    {
+      "field_mappings": [
+        {
+          "transform_field": "KEY1",
+          "data_set_field": "KEY1"
+        },
+        {
+          "transform_field": "COLORS",
+          "data_set_field": "COLORS"
+        },
+        {
+          "transform_field": "DISTINCT_COLORS",
+          "data_set_field": "DISTINCT_COLORS"
+        },
+        {
+          "transform_field": "NUMBERS",
+          "data_set_field": "NUMBERS"
+        },
+        {
+          "transform_field": "DISTINCT_NUMBERS",
+          "data_set_field": "DISTINCT_NUMBERS"
+        }
+      ],
+      "field_order": [
+        "KEY1",
+        "COLORS",
+        "DISTINCT_COLORS",
+        "NUMBERS",
+        "DISTINCT_NUMBERS"
+      ],
+      "transform_name": "Sort rows result",
+      "data_set_name": "golden-groupby-concat"
+    }
+  ],
+  "input_data_sets": [],
+  "name": "0006-groupby-concat UNIT",
+  "description": "",
+  "trans_test_tweaks": [],
+  "persist_filename": "",
+  "pipeline_filename": "./0006-groupby-concat.hpl",
+  "test_type": "UNIT_TEST"
+}
\ No newline at end of file
diff --git a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/Aggregation.java b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/Aggregation.java
index c12951e75f..aa85141442 100644
--- a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/Aggregation.java
+++ b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/Aggregation.java
@@ -69,6 +69,8 @@ public class Aggregation implements Cloneable {
 
   public static final int TYPE_GROUP_CONCAT_STRING_CRLF = 21;
 
+  public static final int TYPE_GROUP_CONCAT_DISTINCT = 22;
+  
   public static final String[]
           typeGroupLabel = /* WARNING: DO NOT TRANSLATE THIS. WE ARE SERIOUS, DON'T TRANSLATE! */ {
           "-",
@@ -92,7 +94,8 @@ public class Aggregation implements Cloneable {
           "COUNT_ANY",
           "STD_DEV_SAMPLE",
           "PERCENTILE_NEAREST_RANK",
-          "CONCAT_STRING_CRLF"
+          "CONCAT_STRING_CRLF",
+          "CONCAT_DISTINCT",
   };
 
   public static final String[] typeGroupLongDesc = {
@@ -117,7 +120,8 @@ public class Aggregation implements Cloneable {
           BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.COUNT_ANY"),
           BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.STANDARD_DEVIATION_SAMPLE"),
           BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.PERCENTILE_NEAREST_RANK"),
-          BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.CONCAT_STRING_CRLF")
+          BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.CONCAT_STRING_CRLF"),
+          BaseMessages.getString(PKG, "GroupByMeta.TypeGroupLongDesc.CONCAT_DISTINCT")
   };
 
 
diff --git a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupBy.java b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupBy.java
index fc373f9e13..d530825273 100644
--- a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupBy.java
+++ b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupBy.java
@@ -33,7 +33,6 @@ import org.apache.hop.pipeline.Pipeline;
 import org.apache.hop.pipeline.PipelineMeta;
 import org.apache.hop.pipeline.transform.BaseTransform;
 import org.apache.hop.pipeline.transform.TransformMeta;
-
 import java.io.*;
 import java.net.SocketTimeoutException;
 import java.util.*;
@@ -537,8 +536,12 @@ public class GroupBy extends BaseTransform<GroupByMeta, GroupByData> {
             }
             sb.append(subjMeta.getString(subj));
           }
-
           break;
+        case Aggregation.TYPE_GROUP_CONCAT_DISTINCT:
+          if (subj != null) {
+            SortedSet<Object> set = (SortedSet<Object>) value;
+            set.add(subj);
+          }          
         default:
           break;
       }
@@ -616,6 +619,10 @@ public class GroupBy extends BaseTransform<GroupByMeta, GroupByData> {
           vMeta = new ValueMetaString(fieldName);
           v = new StringBuilder();
           break;
+        case Aggregation.TYPE_GROUP_CONCAT_DISTINCT:
+          vMeta = new ValueMetaString(fieldName);
+          v = new TreeSet<>();
+          break;          
         default:
           // TODO raise an error here because we cannot continue successfully maybe the UI should
           // validate this
@@ -757,6 +764,18 @@ public class GroupBy extends BaseTransform<GroupByMeta, GroupByData> {
         case Aggregation.TYPE_GROUP_CONCAT_STRING:
           ag = ((StringBuilder) ag).toString();
           break;
+        case Aggregation.TYPE_GROUP_CONCAT_DISTINCT:
+          IValueMeta subjMeta = data.inputRowMeta.getValueMeta(data.subjectnrs[i]);
+          String separator = "";
+          if (!Utils.isEmpty(aggregation.getValue())) {
+            separator = resolve(aggregation.getValue());
+          }
+          StringJoiner joiner = new StringJoiner(separator);         
+          for (Object value: (SortedSet<Object>) ag) {
+              joiner.add(subjMeta.getString(value));
+          }
+          ag = joiner.toString();
+          break;           
         default:
           break;
       }
diff --git a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupByMeta.java b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupByMeta.java
index 52d5930e36..15f8bb5b09 100644
--- a/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupByMeta.java
+++ b/plugins/transforms/groupby/src/main/java/org/apache/hop/pipeline/transforms/groupby/GroupByMeta.java
@@ -281,6 +281,7 @@ public class GroupByMeta extends BaseTransformMeta<GroupBy, GroupByData> {
             break;
           case Aggregation.TYPE_GROUP_CONCAT_STRING:
           case Aggregation.TYPE_GROUP_CONCAT_STRING_CRLF:
+          case Aggregation.TYPE_GROUP_CONCAT_DISTINCT:
             valueType = IValueMeta.TYPE_STRING;
             break;
           default:
diff --git a/plugins/transforms/groupby/src/main/resources/org/apache/hop/pipeline/transforms/groupby/messages/messages_en_US.properties b/plugins/transforms/groupby/src/main/resources/org/apache/hop/pipeline/transforms/groupby/messages/messages_en_US.properties
index 95372f846b..3610cce2ca 100644
--- a/plugins/transforms/groupby/src/main/resources/org/apache/hop/pipeline/transforms/groupby/messages/messages_en_US.properties
+++ b/plugins/transforms/groupby/src/main/resources/org/apache/hop/pipeline/transforms/groupby/messages/messages_en_US.properties
@@ -75,6 +75,7 @@ GroupByMeta.TypeGroupLongDesc.MEDIAN=Median
 GroupByMeta.TypeGroupLongDesc.COUNT_ANY=Number of rows (without field argument)
 GroupByMeta.TypeGroupLongDesc.CONCAT_STRING=Concatenate strings separated by
 GroupByMeta.TypeGroupLongDesc.CONCAT_STRING_CRLF=Concatenate strings separated by new line (CRLF)
+GroupByMeta.TypeGroupLongDesc.CONCAT_DISTINCT=Concatenate distinct values separated by
 GroupByMeta.Injection.PASS_ALL_ROWS=Pass all rows?
 GroupByMeta.Injection.TEMP_DIRECTORY=Temporary directory
 GroupByMeta.Injection.TEMP_FILE_PREFIX=Temporary file prefix
diff --git a/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupBy.java b/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupBy.java
index d761d12481..2a05f5b55d 100644
--- a/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupBy.java
+++ b/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupBy.java
@@ -367,6 +367,11 @@ public class MemoryGroupBy extends BaseTransform<MemoryGroupByMeta, MemoryGroupB
             sb.append(subjMeta.getString(subj));
           }
           break;
+        case MemoryGroupByMeta.TYPE_GROUP_CONCAT_DISTINCT:
+          if (subj != null) {
+            SortedSet<Object> set = (SortedSet<Object>) value;
+            set.add(subj);
+          }
         default:
           break;
       }
@@ -436,6 +441,10 @@ public class MemoryGroupBy extends BaseTransform<MemoryGroupByMeta, MemoryGroupB
           vMeta = new ValueMetaString(meta.getAggregateField()[i]);
           v = new StringBuilder();
           break;
+        case MemoryGroupByMeta.TYPE_GROUP_CONCAT_DISTINCT:
+          vMeta = new ValueMetaString(meta.getAggregateField()[i]);
+          v = new TreeSet<>();
+          break;          
         default:
           throw new HopException(
               "Unknown data type for aggregation : " + meta.getAggregateField()[i]);
@@ -524,6 +533,18 @@ public class MemoryGroupBy extends BaseTransform<MemoryGroupByMeta, MemoryGroupB
           case MemoryGroupByMeta.TYPE_GROUP_CONCAT_STRING:
             ag = ((StringBuilder) ag).toString();
             break;
+          case MemoryGroupByMeta.TYPE_GROUP_CONCAT_DISTINCT:
+            IValueMeta subjMeta = data.inputRowMeta.getValueMeta(data.subjectnrs[i]);
+            String separator = "";
+            if (!Utils.isEmpty(meta.getValueField()[i])) {
+              separator = resolve(meta.getValueField()[i]);
+            }
+            StringJoiner joiner = new StringJoiner(separator);         
+            for (Object value: (SortedSet<Object>) ag) {
+                joiner.add(subjMeta.getString(value));
+            }
+            ag = joiner.toString();
+            break;            
           default:
             break;
         }
diff --git a/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupByMeta.java b/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupByMeta.java
index f116026013..a11cc98a38 100644
--- a/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupByMeta.java
+++ b/plugins/transforms/memgroupby/src/main/java/org/apache/hop/pipeline/transforms/memgroupby/MemoryGroupByMeta.java
@@ -91,6 +91,8 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
   public static final int TYPE_GROUP_COUNT_DISTINCT = 15;
 
   public static final int TYPE_GROUP_COUNT_ANY = 16;
+  
+  public static final int TYPE_GROUP_CONCAT_DISTINCT = 17;
 
   public static final String[]
       typeGroupCode = /* WARNING: DO NOT TRANSLATE THIS. WE ARE SERIOUS, DON'T TRANSLATE! */ {
@@ -111,6 +113,7 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
     "CONCAT_STRING",
     "COUNT_DISTINCT",
     "COUNT_ANY",
+    "CONCAT_DISTINCT",
   };
 
   public static final String[] typeGroupLongDesc = {
@@ -131,6 +134,7 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
     BaseMessages.getString(PKG, "MemoryGroupByMeta.TypeGroupLongDesc.CONCAT_STRING"),
     BaseMessages.getString(PKG, "MemoryGroupByMeta.TypeGroupLongDesc.COUNT_DISTINCT"),
     BaseMessages.getString(PKG, "MemoryGroupByMeta.TypeGroupLongDesc.COUNT_ANY"),
+    BaseMessages.getString(PKG, "MemoryGroupByMeta.TypeGroupLongDesc.CONCAT_DISTINCT"),
   };
 
   @Injection(name = "GROUPFIELD", group = "FIELDS")
@@ -388,9 +392,6 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
           case TYPE_GROUP_COUNT_ANY:
             valueType = IValueMeta.TYPE_INTEGER;
             break;
-          case TYPE_GROUP_CONCAT_COMMA:
-            valueType = IValueMeta.TYPE_STRING;
-            break;
           case TYPE_GROUP_SUM:
           case TYPE_GROUP_AVERAGE:
             if (subj.isNumeric()) {
@@ -404,7 +405,9 @@ public class MemoryGroupByMeta extends BaseTransformMeta<MemoryGroupBy, MemoryGr
           case TYPE_GROUP_STANDARD_DEVIATION:
             valueType = IValueMeta.TYPE_NUMBER;
             break;
+          case TYPE_GROUP_CONCAT_COMMA:
           case TYPE_GROUP_CONCAT_STRING:
+          case TYPE_GROUP_CONCAT_DISTINCT:
             valueType = IValueMeta.TYPE_STRING;
             break;
           default:
diff --git a/plugins/transforms/memgroupby/src/main/resources/org/apache/hop/pipeline/transforms/memgroupby/messages/messages_en_US.properties b/plugins/transforms/memgroupby/src/main/resources/org/apache/hop/pipeline/transforms/memgroupby/messages/messages_en_US.properties
index f33bfe686b..b00e052c86 100644
--- a/plugins/transforms/memgroupby/src/main/resources/org/apache/hop/pipeline/transforms/memgroupby/messages/messages_en_US.properties
+++ b/plugins/transforms/memgroupby/src/main/resources/org/apache/hop/pipeline/transforms/memgroupby/messages/messages_en_US.properties
@@ -43,6 +43,7 @@ MemoryGroupByMeta.TypeGroupLongDesc.LAST_INCL_NULL=Last value
 MemoryGroupByDialog.Log.GettingKeyInfo=getting key info...
 MemoryGroupByDialog.AlwaysAddResult.ToolTip=To make sure we always output a correct count aggregation we always output at least one row, even if there were no input rows.\nThis makes the behavior consistent with the aggregation in an SQL GROUP BY.
 MemoryGroupByMeta.TypeGroupLongDesc.CONCAT_STRING=Concatenate strings separated by
+MemoryGroupByMeta.TypeGroupLongDesc.CONCAT_DISTINCT=Concatenate distinct values separated by
 MemoryGroupByMeta.TypeGroupLongDesc.SUM=Sum
 MemoryGroupByDialog.GetFields.Button=\ &Get Fields 
 MemoryGroupByDialog.ColumnInfo.GroupField=Group field