You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by za...@apache.org on 2019/08/16 11:52:50 UTC
[ignite] 01/01: IGNITE-10697: fixed tabs
This is an automated email from the ASF dual-hosted git repository.
zaleslaw pushed a commit to branch ignite-10697
in repository https://gitbox.apache.org/repos/asf/ignite.git
commit 8bc6e58f961facb50b7aea030b5d1d87e32daee6
Author: Alexey Zinoviev <za...@gmail.com>
AuthorDate: Fri Aug 16 14:52:15 2019 +0300
IGNITE-10697: fixed tabs
---
.../ml/preprocessing/encoding/EncoderTrainer.java | 50 +++++++++++-----------
1 file changed, 25 insertions(+), 25 deletions(-)
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java
index 03b35c3..0eb2850 100644
--- a/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java
+++ b/modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/EncoderTrainer.java
@@ -97,6 +97,31 @@ public class EncoderTrainer<K, V> implements PreprocessingTrainer<K, V> {
}
/**
+ * Calculates encoding frequencies as frequency divided on amount of rows in dataset.
+ *
+ * NOTE: The amount of rows is calculated as sum of absolute frequencies.
+ *
+ * @param dataset Dataset.
+ * @return Encoding frequency for each feature.
+ */
+ private Map<String, Double>[] calculateEncodingFrequencies(Dataset<EmptyContext, EncoderPartitionData> dataset) {
+ Map<String, Integer>[] frequencies = calculateFrequencies(dataset);
+
+ Map<String, Double>[] res = new Map[frequencies.length];
+
+ int[] counters = new int[frequencies.length];
+
+ for (int i = 0; i < frequencies.length; i++) {
+ counters[i] = frequencies[i].values().stream().reduce(0, Integer::sum);
+ int locI = i;
+ res[locI] = new HashMap<>();
+ frequencies[i].forEach((k, v) -> res[locI].put(k, (double)v / counters[locI]));
+ }
+
+ return res;
+ }
+
+ /**
* Calculates frequencies for each feature.
*
* @param dataset Dataset.
@@ -126,31 +151,6 @@ public class EncoderTrainer<K, V> implements PreprocessingTrainer<K, V> {
}
/**
- * Calculates encoding frequencies as frequency divided on amount of rows in dataset.
- *
- * NOTE: The amount of rows is calculated as sum of absolute frequencies.
- *
- * @param dataset Dataset.
- * @return Encoding frequency for each feature.
- */
- private Map<String, Double>[] calculateEncodingFrequencies(Dataset<EmptyContext, EncoderPartitionData> dataset) {
- Map<String, Integer>[] frequencies = calculateFrequencies(dataset);
-
- Map<String, Double>[] res = new Map[frequencies.length];
-
- int[] counters = new int[frequencies.length];
-
- for (int i = 0; i < frequencies.length; i++) {
- counters[i] = frequencies[i].values().stream().reduce(0, Integer::sum);
- int locI = i;
- res[locI] = new HashMap<>();
- frequencies[i].forEach((k, v) -> res[locI].put(k, (double)v / counters[locI]));
- }
-
- return res;
- }
-
- /**
* Calculates the encoding values values by frequencies keeping in the given dataset.
*
* @param dataset The dataset of frequencies for each feature aggregated in each partition.