You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by za...@apache.org on 2019/08/16 11:52:49 UTC

[ignite] branch ignite-10697 created (now 8bc6e58)

This is an automated email from the ASF dual-hosted git repository.

zaleslaw pushed a change to branch ignite-10697
in repository https://gitbox.apache.org/repos/asf/ignite.git.


      at 8bc6e58  IGNITE-10697: fixed tabs

This branch includes the following new commits:

     new 8bc6e58  IGNITE-10697: fixed tabs

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[ignite] 01/01: IGNITE-10697: fixed tabs

Posted by za...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

zaleslaw pushed a commit to branch ignite-10697
in repository https://gitbox.apache.org/repos/asf/ignite.git

commit 8bc6e58f961facb50b7aea030b5d1d87e32daee6
Author: Alexey Zinoviev <za...@gmail.com>
AuthorDate: Fri Aug 16 14:52:15 2019 +0300

    IGNITE-10697: fixed tabs
---
 .../ml/preprocessing/encoding/EncoderTrainer.java  | 50 +++++++++++-----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java
index 03b35c3..0eb2850 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java
@@ -97,6 +97,31 @@ public class EncoderTrainer<K, V> implements PreprocessingTrainer<K, V> {
     }
 
     /**
+     * Calculates encoding frequencies as frequency divided on amount of rows in dataset.
+     *
+     * NOTE: The amount of rows is calculated as sum of absolute frequencies.
+     *
+     * @param dataset Dataset.
+     * @return Encoding frequency for each feature.
+     */
+    private Map<String, Double>[] calculateEncodingFrequencies(Dataset<EmptyContext, EncoderPartitionData> dataset) {
+        Map<String, Integer>[] frequencies = calculateFrequencies(dataset);
+
+        Map<String, Double>[] res = new Map[frequencies.length];
+
+        int[] counters = new int[frequencies.length];
+
+        for (int i = 0; i < frequencies.length; i++) {
+            counters[i] = frequencies[i].values().stream().reduce(0, Integer::sum);
+            int locI = i;
+            res[locI] = new HashMap<>();
+            frequencies[i].forEach((k, v) -> res[locI].put(k, (double)v / counters[locI]));
+        }
+
+        return res;
+    }
+
+    /**
      * Calculates frequencies for each feature.
      *
      * @param dataset Dataset.
@@ -126,31 +151,6 @@ public class EncoderTrainer<K, V> implements PreprocessingTrainer<K, V> {
     }
 
     /**
-     * Calculates encoding frequencies as frequency divided on amount of rows in dataset.
-     *
-     * NOTE: The amount of rows is calculated as sum of absolute frequencies.
-     *
-     * @param dataset Dataset.
-     * @return Encoding frequency for each feature.
-     */
-    private Map<String, Double>[] calculateEncodingFrequencies(Dataset<EmptyContext, EncoderPartitionData> dataset) {
-        Map<String, Integer>[] frequencies = calculateFrequencies(dataset);
-
-        Map<String, Double>[] res = new Map[frequencies.length];
-
-        int[] counters = new int[frequencies.length];
-
-        for (int i = 0; i < frequencies.length; i++) {
-            counters[i] = frequencies[i].values().stream().reduce(0, Integer::sum);
-            int locI = i;
-            res[locI] = new HashMap<>();
-            frequencies[i].forEach((k, v) -> res[locI].put(k, (double)v / counters[locI]));
-        }
-
-        return res;
-    }
-
-    /**
      * Calculates the encoding values values by frequencies keeping in the given dataset.
      *
      * @param dataset The dataset of frequencies for each feature aggregated in each partition.