You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by gr...@apache.org on 2020/01/11 15:37:19 UTC

[crunch] 01/01: CRUNCH-693: Make text parsing locale-independent

This is an automated email from the ASF dual-hosted git repository.

greid pushed a commit to branch CRUNCH-693
in repository https://gitbox.apache.org/repos/asf/crunch.git

commit 37be4b4f468b2126fbb4e31df7ec4cd86884cbf9
Author: Gabriel Reid <gr...@apache.org>
AuthorDate: Sat Jan 11 16:35:20 2020 +0100

    CRUNCH-693: Make text parsing locale-independent
    
    Standardize on US-based locale for number formatting (which is
    backwards-compatible with historical behavior).
---
 .../main/java/org/apache/crunch/contrib/text/TokenizerFactory.java | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java b/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java
index f43478d..555ad8c 100644
--- a/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java
+++ b/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java
@@ -17,14 +17,14 @@
  */
 package org.apache.crunch.contrib.text;
 
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
+
 import java.io.Serializable;
 import java.util.Locale;
 import java.util.Scanner;
 import java.util.Set;
 
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableSet;
-
 /**
  * Factory class that constructs {@link Tokenizer} instances for input strings that use a fixed
  * set of delimiters, skip patterns, locales, and sets of indices to keep or drop.
@@ -65,6 +65,7 @@ public class TokenizerFactory implements Serializable {
    */
   public Tokenizer create(String input) {
     Scanner s = new Scanner(input);
+    s.useLocale(Locale.US); // Use period for floating point number formatting
     if (delim != null) {
       s.useDelimiter(delim);
     }