You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by gr...@apache.org on 2020/01/11 15:37:18 UTC
[crunch] branch CRUNCH-693 created (now 37be4b4)
This is an automated email from the ASF dual-hosted git repository.
greid pushed a change to branch CRUNCH-693
in repository https://gitbox.apache.org/repos/asf/crunch.git.
at 37be4b4 CRUNCH-693: Make text parsing locale-independent
This branch includes the following new commits:
new 37be4b4 CRUNCH-693: Make text parsing locale-independent
The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
[crunch] 01/01: CRUNCH-693: Make text parsing locale-independent
Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
greid pushed a commit to branch CRUNCH-693
in repository https://gitbox.apache.org/repos/asf/crunch.git
commit 37be4b4f468b2126fbb4e31df7ec4cd86884cbf9
Author: Gabriel Reid <gr...@apache.org>
AuthorDate: Sat Jan 11 16:35:20 2020 +0100
CRUNCH-693: Make text parsing locale-independent
Standardize on US-based locale for number formatting (which is
backwards-compatible with historical behavior).
---
.../main/java/org/apache/crunch/contrib/text/TokenizerFactory.java | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java b/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java
index f43478d..555ad8c 100644
--- a/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java
+++ b/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java
@@ -17,14 +17,14 @@
*/
package org.apache.crunch.contrib.text;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
+
import java.io.Serializable;
import java.util.Locale;
import java.util.Scanner;
import java.util.Set;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableSet;
-
/**
* Factory class that constructs {@link Tokenizer} instances for input strings that use a fixed
* set of delimiters, skip patterns, locales, and sets of indices to keep or drop.
@@ -65,6 +65,7 @@ public class TokenizerFactory implements Serializable {
*/
public Tokenizer create(String input) {
Scanner s = new Scanner(input);
+ s.useLocale(Locale.US); // Use period for floating point number formatting
if (delim != null) {
s.useDelimiter(delim);
}