You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by gr...@apache.org on 2020/01/11 15:37:18 UTC

[crunch] branch CRUNCH-693 created (now 37be4b4)

This is an automated email from the ASF dual-hosted git repository.

greid pushed a change to branch CRUNCH-693
in repository https://gitbox.apache.org/repos/asf/crunch.git.


      at 37be4b4  CRUNCH-693: Make text parsing locale-independent

This branch includes the following new commits:

     new 37be4b4  CRUNCH-693: Make text parsing locale-independent

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[crunch] 01/01: CRUNCH-693: Make text parsing locale-independent

Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

greid pushed a commit to branch CRUNCH-693
in repository https://gitbox.apache.org/repos/asf/crunch.git

commit 37be4b4f468b2126fbb4e31df7ec4cd86884cbf9
Author: Gabriel Reid <gr...@apache.org>
AuthorDate: Sat Jan 11 16:35:20 2020 +0100

    CRUNCH-693: Make text parsing locale-independent
    
    Standardize on US-based locale for number formatting (which is
    backwards-compatible with historical behavior).
---
 .../main/java/org/apache/crunch/contrib/text/TokenizerFactory.java | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java b/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java
index f43478d..555ad8c 100644
--- a/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java
+++ b/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java
@@ -17,14 +17,14 @@
  */
 package org.apache.crunch.contrib.text;
 
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
+
 import java.io.Serializable;
 import java.util.Locale;
 import java.util.Scanner;
 import java.util.Set;
 
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableSet;
-
 /**
  * Factory class that constructs {@link Tokenizer} instances for input strings that use a fixed
  * set of delimiters, skip patterns, locales, and sets of indices to keep or drop.
@@ -65,6 +65,7 @@ public class TokenizerFactory implements Serializable {
    */
   public Tokenizer create(String input) {
     Scanner s = new Scanner(input);
+    s.useLocale(Locale.US); // Use period for floating point number formatting
     if (delim != null) {
       s.useDelimiter(delim);
     }