You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by da...@apache.org on 2015/07/03 00:58:48 UTC

spark git commit: fix string order for non-ascii character

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 bc51bcaea -> 3f1e4efbd


fix string order for non-ascii character


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3f1e4efb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3f1e4efb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3f1e4efb

Branch: refs/heads/branch-1.4
Commit: 3f1e4efbd8f67a3bb1a9e33be91e2a07e0aa01b0
Parents: bc51bca
Author: Davies Liu <da...@databricks.com>
Authored: Thu Jul 2 15:58:42 2015 -0700
Committer: Davies Liu <da...@databricks.com>
Committed: Thu Jul 2 15:58:42 2015 -0700

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/sql/types/UTF8String.scala  | 3 ++-
 .../test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3f1e4efb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
index f5d8fcc..e17743a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
@@ -156,7 +156,8 @@ final class UTF8String extends Ordered[UTF8String] with Serializable {
     var i: Int = 0
     val b = other.getBytes
     while (i < bytes.length && i < b.length) {
-      val res = bytes(i).compareTo(b(i))
+      // UTF8 should be compared as unsigned int
+      val res = (bytes(i) & 0xFF) - (b(i) & 0xFF)
       if (res != 0) return res
       i += 1
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/3f1e4efb/sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala
index 81d7ab0..02feee4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala
@@ -40,6 +40,11 @@ class UTF8StringSuite extends SparkFunSuite {
     check("世 界", 3)
   }
 
+  test("compare") {
+    assert(UTF8String("abc") > UTF8String("ABC"))
+    assert(UTF8String("世 界") > UTF8String("abc"))
+  }
+
   test("contains") {
     assert(UTF8String("hello").contains(UTF8String("ello")))
     assert(!UTF8String("hello").contains(UTF8String("vello")))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org