You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by ji...@apache.org on 2015/10/29 05:44:59 UTC
[08/15] incubator-asterixdb git commit: ASTERIXDB-1102: VarSize
Encoding to store length of String and ByteArray
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm b/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm
index 9e639d6..4f811a2 100644
--- a/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm
@@ -1,2 +1,2 @@
-[ [ true, null, null, null ]
+[ [ true, true, true, true, true, true, true, null, null, null ]
]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm b/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm
index 975a5f0..d1a46d0 100644
--- a/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm
@@ -1,2 +1,2 @@
-[ true
+[ [ true, true, true, true, true, true, true, true, true, true, true, true, true, true ]
]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm b/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm
index 8429702..9b7d762 100644
--- a/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm
@@ -1,5 +1,5 @@
[ { "tweetid": "10", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("29.15,76.53"), "send-time": datetime("2008-01-26T10:10:00.000Z"), "referred-topics": {{ "verizon", "voice-clarity" }}, "message-text": " hate verizon its voice-clarity is OMG:(" }
-, { "tweetid": "6", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("47.51,83.99"), "send-time": datetime("2010-05-07T10:10:00.000Z"), "referred-topics": {{ "iphone", "voice-clarity" }}, "message-text": " like iphone the voice-clarity is good:)" }
, { "tweetid": "11", "user": { "screen-name": "NilaMilliron_tw", "lang": "en", "friends_count": 445, "statuses_count": 164, "name": "Nila Milliron", "followers_count": 22649 }, "sender-location": point("37.59,68.42"), "send-time": datetime("2008-03-09T10:10:00.000Z"), "referred-topics": {{ "iphone", "platform" }}, "message-text": " can't stand iphone its platform is terrible" }
-, { "tweetid": "2", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("32.84,67.14"), "send-time": datetime("2010-05-13T10:10:00.000Z"), "referred-topics": {{ "verizon", "shortcut-menu" }}, "message-text": " like verizon its shortcut-menu is awesome:)" }
+, { "tweetid": "2", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("32.84,67.14"), "send-time": datetime("2010-05-13T10:10:00.000Z"), "referred-topics": {{ "verizon", "shortcut-menu" }}, "message-text": " like verizon its shortcut-menu is awesome:)" }
+, { "tweetid": "6", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("47.51,83.99"), "send-time": datetime("2010-05-07T10:10:00.000Z"), "referred-topics": {{ "iphone", "voice-clarity" }}, "message-text": " like iphone the voice-clarity is good:)" }
]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm b/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm
index 12c16d1..22b519d 100644
--- a/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm
@@ -1,5 +1,5 @@
[ { "tweetid": "10", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("29.15,76.53"), "send-time": datetime("2008-01-26T10:10:00.000Z"), "referred-topics": {{ "verizon", "voice-clarity" }}, "message-text": " hate verizon its voice-clarity is OMG:(" }
+, { "tweetid": "2", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("32.84,67.14"), "send-time": datetime("2010-05-13T10:10:00.000Z"), "referred-topics": {{ "verizon", "shortcut-menu" }}, "message-text": " like verizon its shortcut-menu is awesome:)" }
, { "tweetid": "6", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("47.51,83.99"), "send-time": datetime("2010-05-07T10:10:00.000Z"), "referred-topics": {{ "iphone", "voice-clarity" }}, "message-text": " like iphone the voice-clarity is good:)" }
, { "tweetid": "7", "user": { "screen-name": "ChangEwing_573", "lang": "en", "friends_count": 182, "statuses_count": 394, "name": "Chang Ewing", "followers_count": 32136 }, "sender-location": point("36.21,72.6"), "send-time": datetime("2011-08-25T10:10:00.000Z"), "referred-topics": {{ "samsung", "platform" }}, "message-text": " like samsung the platform is good" }
-, { "tweetid": "2", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("32.84,67.14"), "send-time": datetime("2010-05-13T10:10:00.000Z"), "referred-topics": {{ "verizon", "shortcut-menu" }}, "message-text": " like verizon its shortcut-menu is awesome:)" }
]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with1/end-with1.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with1/end-with1.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with1/end-with1.1.adm
deleted file mode 100644
index e452ee2..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with1/end-with1.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": false }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with2/end-with2.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with2/end-with2.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with2/end-with2.1.adm
deleted file mode 100644
index 792013a..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with2/end-with2.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": true }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with3/end-with3.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with3/end-with3.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with3/end-with3.1.adm
deleted file mode 100644
index 792013a..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with3/end-with3.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": true }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with4/end-with4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with4/end-with4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with4/end-with4.1.adm
deleted file mode 100644
index e452ee2..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with4/end-with4.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": false }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with5/end-with5.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with5/end-with5.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with5/end-with5.1.adm
deleted file mode 100644
index 3d1cf35..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with5/end-with5.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "f1": true, "f2": false, "f3": true, "f4": false, "f5": true, "f6": false }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with1/ends-with1.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with1/ends-with1.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with1/ends-with1.1.adm
new file mode 100644
index 0000000..e452ee2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with1/ends-with1.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": false }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with2/ends-with2.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with2/ends-with2.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with2/ends-with2.1.adm
new file mode 100644
index 0000000..792013a
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with2/ends-with2.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": true }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with3/ends-with3.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with3/ends-with3.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with3/ends-with3.1.adm
new file mode 100644
index 0000000..792013a
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with3/ends-with3.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": true }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with4/ends-with4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with4/ends-with4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with4/ends-with4.1.adm
new file mode 100644
index 0000000..e452ee2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with4/ends-with4.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": false }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with5/ends-with5.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with5/ends-with5.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with5/ends-with5.1.adm
new file mode 100644
index 0000000..7ef4d67
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with5/ends-with5.1.adm
@@ -0,0 +1,2 @@
+[ { "f1": true, "f2": null, "f3": null, "f4": null, "f5": true, "f6": null }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with6/ends-with6.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with6/ends-with6.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with6/ends-with6.1.adm
new file mode 100644
index 0000000..9a8b5dc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with6/ends-with6.1.adm
@@ -0,0 +1,2 @@
+[ [ false, true ]
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with7/ends-with7.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with7/ends-with7.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with7/ends-with7.1.adm
new file mode 100644
index 0000000..f6ec4a3
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with7/ends-with7.1.adm
@@ -0,0 +1,7 @@
+[ false
+, false
+, true
+, true
+, true
+, true
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with8/ends-with8.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with8/ends-with8.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with8/ends-with8.1.adm
new file mode 100644
index 0000000..f696fc1
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with8/ends-with8.1.adm
@@ -0,0 +1,5 @@
+[ { "name": "I am Jones" }
+, { "name": "Jim Jones" }
+, { "name": "Marian Jones" }
+, { "name": "Phil Jones" }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with_01/ends-with_01.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with_01/ends-with_01.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with_01/ends-with_01.1.adm
deleted file mode 100644
index 9a8b5dc..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/ends-with_01/ends-with_01.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ [ false, true ]
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/endwith02/endwith02.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/endwith02/endwith02.1.adm b/asterix-app/src/test/resources/runtimets/results/string/endwith02/endwith02.1.adm
deleted file mode 100644
index f6ec4a3..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/endwith02/endwith02.1.adm
+++ /dev/null
@@ -1,7 +0,0 @@
-[ false
-, false
-, true
-, true
-, true
-, true
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/endwith03/endwith03.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/endwith03/endwith03.1.adm b/asterix-app/src/test/resources/runtimets/results/string/endwith03/endwith03.1.adm
deleted file mode 100644
index f696fc1..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/endwith03/endwith03.1.adm
+++ /dev/null
@@ -1,5 +0,0 @@
-[ { "name": "I am Jones" }
-, { "name": "Jim Jones" }
-, { "name": "Marian Jones" }
-, { "name": "Phil Jones" }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm b/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm
index 51bc6cf..36fc447 100644
--- a/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm
@@ -1,6 +1,7 @@
-[ false
-, false
+[ null
+, null
, false
, false
+, null
, false
]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm b/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm
index 5721c80..3e9f513 100644
--- a/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm
@@ -1,2 +1,2 @@
-[ { "result1": false, "result2": false, "result3": true, "result4": false, "result5": false, "result6": true }
+[ { "result1": null, "result2": null, "result3": null, "result4": null, "result5": null, "result6": null }
]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with1/start-with1.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with1/start-with1.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with1/start-with1.1.adm
deleted file mode 100644
index 792013a..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with1/start-with1.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": true }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with2/start-with2.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with2/start-with2.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with2/start-with2.1.adm
deleted file mode 100644
index e452ee2..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with2/start-with2.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": false }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with3/start-with3.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with3/start-with3.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with3/start-with3.1.adm
deleted file mode 100644
index 792013a..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with3/start-with3.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": true }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with4/start-with4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with4/start-with4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with4/start-with4.1.adm
deleted file mode 100644
index 3d1cf35..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with4/start-with4.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "f1": true, "f2": false, "f3": true, "f4": false, "f5": true, "f6": false }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with5/start-with5.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with5/start-with5.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with5/start-with5.1.adm
deleted file mode 100644
index e452ee2..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with5/start-with5.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": false }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with1/starts-with1.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with1/starts-with1.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with1/starts-with1.1.adm
new file mode 100644
index 0000000..792013a
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with1/starts-with1.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": true }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with2/starts-with2.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with2/starts-with2.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with2/starts-with2.1.adm
new file mode 100644
index 0000000..e452ee2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with2/starts-with2.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": false }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with3/starts-with3.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with3/starts-with3.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with3/starts-with3.1.adm
new file mode 100644
index 0000000..792013a
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with3/starts-with3.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": true }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with4/starts-with4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with4/starts-with4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with4/starts-with4.1.adm
new file mode 100644
index 0000000..7ef4d67
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with4/starts-with4.1.adm
@@ -0,0 +1,2 @@
+[ { "f1": true, "f2": null, "f3": null, "f4": null, "f5": true, "f6": null }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with5/starts-with5.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with5/starts-with5.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with5/starts-with5.1.adm
new file mode 100644
index 0000000..e452ee2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with5/starts-with5.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": false }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with6/starts-with6.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with6/starts-with6.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with6/starts-with6.1.adm
new file mode 100644
index 0000000..9a8b5dc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with6/starts-with6.1.adm
@@ -0,0 +1,2 @@
+[ [ false, true ]
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with7/starts-with7.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with7/starts-with7.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with7/starts-with7.1.adm
new file mode 100644
index 0000000..eae1439
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with7/starts-with7.1.adm
@@ -0,0 +1,16 @@
+[ null
+, true
+, false
+, true
+, false
+, null
+, true
+, true
+, false
+, true
+, false
+, true
+, true
+, false
+, false
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with8/starts-with8.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with8/starts-with8.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with8/starts-with8.1.adm
new file mode 100644
index 0000000..3ff35f7
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with8/starts-with8.1.adm
@@ -0,0 +1,6 @@
+[ { "name": "John Doe" }
+, { "name": "John Smith" }
+, { "name": "John Wayne" }
+, { "name": "Johnny Walker" }
+, { "name": "Johnson Ben" }
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with_01/starts-with_01.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with_01/starts-with_01.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with_01/starts-with_01.1.adm
deleted file mode 100644
index 9a8b5dc..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/starts-with_01/starts-with_01.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ [ false, true ]
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/startwith02/startwith02.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/startwith02/startwith02.1.adm b/asterix-app/src/test/resources/runtimets/results/string/startwith02/startwith02.1.adm
deleted file mode 100644
index 933ea38..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/startwith02/startwith02.1.adm
+++ /dev/null
@@ -1,15 +0,0 @@
-[ true
-, false
-, true
-, false
-, false
-, true
-, true
-, false
-, true
-, false
-, true
-, true
-, false
-, false
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/startwith03/startwith03.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/startwith03/startwith03.1.adm b/asterix-app/src/test/resources/runtimets/results/string/startwith03/startwith03.1.adm
deleted file mode 100644
index 3ff35f7..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/startwith03/startwith03.1.adm
+++ /dev/null
@@ -1,6 +0,0 @@
-[ { "name": "John Doe" }
-, { "name": "John Smith" }
-, { "name": "John Wayne" }
-, { "name": "Johnny Walker" }
-, { "name": "Johnson Ben" }
- ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm
index 84836dc..07187b3 100644
--- a/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm
@@ -1,2 +1,2 @@
-[ { "result1": true, "result3": false, "result4": false, "result5": true }
+[ { "result1": true, "result3": null, "result4": null, "result5": null }
]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/varlen-encoding/varlen-encoding.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/varlen-encoding/varlen-encoding.1.adm b/asterix-app/src/test/resources/runtimets/results/string/varlen-encoding/varlen-encoding.1.adm
new file mode 100644
index 0000000..12b6cef
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/varlen-encoding/varlen-encoding.1.adm
@@ -0,0 +1,2 @@
+[ [ 127, 128, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216 ]
+ ]
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/testsuite.xml
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index b6417bc..097805e 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -18,9 +18,9 @@
!-->
<!DOCTYPE test-suite [
-<!ENTITY RecordsQueries SYSTEM "queries/records/RecordsQueries.xml">
+ <!ENTITY RecordsQueries SYSTEM "queries/records/RecordsQueries.xml">
-]>
+ ]>
<test-suite
xmlns="urn:xml.testframework.asterix.apache.org"
ResultOffsetPath="results"
@@ -4810,43 +4810,43 @@
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="end-with1">
- <output-dir compare="Text">end-with1</output-dir>
+ <compilation-unit name="ends-with1">
+ <output-dir compare="Text">ends-with1</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="end-with2">
- <output-dir compare="Text">end-with2</output-dir>
+ <compilation-unit name="ends-with2">
+ <output-dir compare="Text">ends-with2</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="end-with3">
- <output-dir compare="Text">end-with3</output-dir>
+ <compilation-unit name="ends-with3">
+ <output-dir compare="Text">ends-with3</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="end-with4">
- <output-dir compare="Text">end-with4</output-dir>
+ <compilation-unit name="ends-with4">
+ <output-dir compare="Text">ends-with4</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="end-with5">
- <output-dir compare="Text">end-with5</output-dir>
+ <compilation-unit name="ends-with5">
+ <output-dir compare="Text">ends-with5</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="ends-with_01">
- <output-dir compare="Text">ends-with_01</output-dir>
+ <compilation-unit name="ends-with6">
+ <output-dir compare="Text">ends-with6</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="endwith02">
- <output-dir compare="Text">endwith02</output-dir>
+ <compilation-unit name="ends-with7">
+ <output-dir compare="Text">ends-with7</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="endwith03">
- <output-dir compare="Text">endwith03</output-dir>
+ <compilation-unit name="ends-with8">
+ <output-dir compare="Text">ends-with8</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
@@ -4975,44 +4975,44 @@
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="start-with1">
- <output-dir compare="Text">start-with1</output-dir>
+ <compilation-unit name="starts-with1">
+ <output-dir compare="Text">starts-with1</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="start-with2">
- <output-dir compare="Text">start-with2</output-dir>
+ <compilation-unit name="starts-with2">
+ <output-dir compare="Text">starts-with2</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="start-with3">
- <output-dir compare="Text">start-with3</output-dir>
+ <compilation-unit name="starts-with3">
+ <output-dir compare="Text">starts-with3</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="start-with4">
- <output-dir compare="Text">start-with4</output-dir>
+ <compilation-unit name="starts-with4">
+ <output-dir compare="Text">starts-with4</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="start-with5">
- <output-dir compare="Text">start-with5</output-dir>
+ <compilation-unit name="starts-with5">
+ <output-dir compare="Text">starts-with5</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="starts-with_01">
- <output-dir compare="Text">starts-with_01</output-dir>
+ <compilation-unit name="starts-with6">
+ <output-dir compare="Text">starts-with6</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="startwith02">
- <output-dir compare="Text">startwith02</output-dir>
+ <compilation-unit name="starts-with7">
+ <output-dir compare="Text">starts-with7</output-dir>
</compilation-unit>
</test-case>
<!--
<test-case FilePath="string">
- <compilation-unit name="startwith03">
- <output-dir compare="Text">startwith03</output-dir>
+ <compilation-unit name="starts-with8">
+ <output-dir compare="Text">starts-with8</output-dir>
</compilation-unit>
</test-case>
-->
@@ -5196,6 +5196,11 @@
<output-dir compare="Text">uppercase</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="string">
+ <compilation-unit name="varlen-encoding">
+ <output-dir compare="Text">varlen-encoding</output-dir>
+ </compilation-unit>
+ </test-case>
</test-group>
<test-group name="subset-collection">
<test-case FilePath="subset-collection">
@@ -6206,6 +6211,11 @@
<output-dir compare="Text">big_object_join</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="big-object">
+ <compilation-unit name="big_object_load">
+ <output-dir compare="Text">big_object_load</output-dir>
+ </compilation-unit>
+ </test-case>
</test-group>
<test-group name="external-indexing">
<test-case FilePath="external-indexing">
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-common/src/main/java/org/apache/asterix/common/utils/UTF8CharSequence.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/utils/UTF8CharSequence.java b/asterix-common/src/main/java/org/apache/asterix/common/utils/UTF8CharSequence.java
deleted file mode 100644
index 468aca8..0000000
--- a/asterix-common/src/main/java/org/apache/asterix/common/utils/UTF8CharSequence.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.common.utils;
-
-import org.apache.hyracks.data.std.api.IValueReference;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
-
-public class UTF8CharSequence implements CharSequence {
-
- private int start;
- private int len;
- private char[] buf;
-
- public UTF8CharSequence(IValueReference valueRef, int start) {
- reset(valueRef, start);
- }
-
- public UTF8CharSequence() {
- }
-
- @Override
- public char charAt(int index) {
- if (index >= len || index < 0) {
- throw new IndexOutOfBoundsException("No index " + index + " for string of length " + len);
- }
- return buf[index];
- }
-
- @Override
- public int length() {
- return len;
- }
-
- @Override
- public CharSequence subSequence(int start, int end) {
- UTF8CharSequence carSeq = new UTF8CharSequence();
- carSeq.len = end - start;
- if (end != start) {
- carSeq.buf = new char[carSeq.len];
- System.arraycopy(buf, start, carSeq.buf, 0, carSeq.len);
- }
- return carSeq;
- }
-
- public void reset(IValueReference valueRef, int start) {
- this.start = start;
- resetLength(valueRef);
- if (buf == null || buf.length < len) {
- buf = new char[len];
- }
- int sStart = start + 2;
- int c = 0;
- int i = 0;
- byte[] bytes = valueRef.getByteArray();
- while (c < len) {
- buf[i++] = UTF8StringPointable.charAt(bytes, sStart + c);
- c += UTF8StringPointable.charSize(bytes, sStart + c);
- }
-
- }
-
- private void resetLength(IValueReference valueRef) {
- this.len = UTF8StringPointable.getUTFLength(valueRef.getByteArray(), start);
- }
-
- @Override
- public String toString() {
- StringBuffer bf = new StringBuffer();
- if (buf != null) {
- for (int i = 0; i < buf.length; i++) {
- bf.append(buf[i]);
- }
- }
- return new String(bf);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java b/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java
index 8ff524e..6cc7abb 100644
--- a/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java
+++ b/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java
@@ -35,7 +35,18 @@ import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.commons.httpclient.*;
+import org.apache.asterix.common.config.GlobalConfig;
+import org.apache.asterix.testframework.context.TestCaseContext;
+import org.apache.asterix.testframework.context.TestCaseContext.OutputFormat;
+import org.apache.asterix.testframework.context.TestFileContext;
+import org.apache.asterix.testframework.xml.TestCase.CompilationUnit;
+import org.apache.asterix.testframework.xml.TestGroup;
+import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpMethod;
+import org.apache.commons.httpclient.HttpMethodBase;
+import org.apache.commons.httpclient.HttpStatus;
+import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.methods.StringRequestEntity;
@@ -43,12 +54,6 @@ import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.io.IOUtils;
import org.json.JSONObject;
-import org.apache.asterix.common.config.GlobalConfig;
-import org.apache.asterix.testframework.context.TestCaseContext;
-import org.apache.asterix.testframework.context.TestCaseContext.OutputFormat;
-import org.apache.asterix.testframework.context.TestFileContext;
-import org.apache.asterix.testframework.xml.TestCase.CompilationUnit;
-
public class TestsUtils {
private static final Logger LOGGER = Logger.getLogger(TestsUtils.class.getName());
@@ -383,6 +388,11 @@ public class TestsUtils {
public static void executeTest(String actualPath, TestCaseContext testCaseCtx, ProcessBuilder pb,
boolean isDmlRecoveryTest) throws Exception {
+ executeTest(actualPath, testCaseCtx, pb, isDmlRecoveryTest, null);
+ }
+
+ public static void executeTest(String actualPath, TestCaseContext testCaseCtx, ProcessBuilder pb,
+ boolean isDmlRecoveryTest, TestGroup failedGroup) throws Exception {
File testFile;
File expectedResultFile;
@@ -395,7 +405,8 @@ public class TestsUtils {
List<CompilationUnit> cUnits = testCaseCtx.getTestCase().getCompilationUnit();
for (CompilationUnit cUnit : cUnits) {
- LOGGER.info("Starting [TEST]: " + testCaseCtx.getTestCase().getFilePath() + "/" + cUnit.getName() + " ... ");
+ LOGGER.info(
+ "Starting [TEST]: " + testCaseCtx.getTestCase().getFilePath() + "/" + cUnit.getName() + " ... ");
testFileCtxs = testCaseCtx.getTestFiles(cUnit);
expectedResultFileCtxs = testCaseCtx.getExpectedResultFiles(cUnit);
for (TestFileContext ctx : testFileCtxs) {
@@ -524,16 +535,22 @@ public class TestsUtils {
}
} catch (Exception e) {
+
System.err.println("testFile " + testFile.toString() + " raised an exception:");
+
e.printStackTrace();
if (cUnit.getExpectedError().isEmpty()) {
System.err.println("...Unexpected!");
+ if (failedGroup != null) {
+ failedGroup.getTestCase().add(testCaseCtx.getTestCase());
+ }
throw new Exception("Test \"" + testFile + "\" FAILED!", e);
} else {
LOGGER.info("[TEST]: " + testCaseCtx.getTestCase().getFilePath() + "/" + cUnit.getName()
+ " failed as expected: " + e.getMessage());
System.err.println("...but that was expected.");
}
+
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
index 8029b8f..ebba65b 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
@@ -41,7 +41,8 @@ import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
@SuppressWarnings("unchecked")
public class FileIndexTupleTranslator {
- private ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(FilesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFieldCount());
+ private final FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
+ private ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(filesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFieldCount());
private RecordBuilder recordBuilder = new RecordBuilder();
private ArrayBackedValueStorage fieldValue = new ArrayBackedValueStorage();
private AMutableInt32 aInt32 = new AMutableInt32(0);
@@ -57,11 +58,11 @@ public class FileIndexTupleTranslator {
tupleBuilder.reset();
//File Number
aInt32.setValue(file.getFileNumber());
- FilesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFields()[0].serialize(aInt32, tupleBuilder.getDataOutput());
+ filesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFields()[0].serialize(aInt32, tupleBuilder.getDataOutput());
tupleBuilder.addFieldEndOffset();
//File Record
- recordBuilder.reset(FilesIndexDescription.EXTERNAL_FILE_RECORD_TYPE);
+ recordBuilder.reset(filesIndexDescription.EXTERNAL_FILE_RECORD_TYPE);
// write field 0 (File Name)
fieldValue.reset();
aString.setValue(file.getFileName());
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
index e80247f..d7fa4f2 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
@@ -58,6 +58,7 @@ import org.apache.asterix.om.util.NonTaggedFormatUtil;
import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.util.string.UTF8StringWriter;
@SuppressWarnings("deprecation")
public class HiveObjectParser implements IAsterixHDFSRecordParser {
@@ -75,6 +76,7 @@ public class HiveObjectParser implements IAsterixHDFSRecordParser {
private UnorderedListBuilder unorderedListBuilder;
private boolean initialized = false;
private List<StructField> fieldRefs;
+ private UTF8StringWriter utf8Writer = new UTF8StringWriter();
@SuppressWarnings({ "unchecked" })
@Override
@@ -308,7 +310,7 @@ public class HiveObjectParser implements IAsterixHDFSRecordParser {
}
private void parseString(Object obj, StringObjectInspector foi, DataOutput dataOutput) throws IOException {
- dataOutput.writeUTF(foi.getPrimitiveJavaObject(obj));
+ utf8Writer.writeUTF8(foi.getPrimitiveJavaObject(obj), dataOutput);
}
private void parseTime(Object obj, TimestampObjectInspector foi, DataOutput dataOutput) throws IOException {
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
index 0aa761c..d8865b4 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
@@ -36,9 +36,10 @@ import org.apache.hyracks.storage.am.lsm.common.api.ITwoPCIndex;
public class ExternalIndexBulkModifyOperatorNodePushable extends IndexBulkLoadOperatorNodePushable {
+ private final FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
private final int[] deletedFiles;
private ArrayTupleBuilder buddyBTreeTupleBuilder = new ArrayTupleBuilder(
- FilesIndexDescription.FILE_BUDDY_BTREE_RECORD_DESCRIPTOR.getFieldCount());
+ filesIndexDescription.FILE_BUDDY_BTREE_RECORD_DESCRIPTOR.getFieldCount());
private AMutableInt32 fileNumber = new AMutableInt32(0);
private ArrayTupleReference deleteTuple = new ArrayTupleReference();
@@ -65,7 +66,7 @@ public class ExternalIndexBulkModifyOperatorNodePushable extends IndexBulkLoadOp
// Delete files
for (int i = 0; i < deletedFiles.length; i++) {
fileNumber.setValue(deletedFiles[i]);
- FilesIndexDescription.getBuddyBTreeTupleFromFileNumber(deleteTuple, buddyBTreeTupleBuilder, fileNumber);
+ filesIndexDescription.getBuddyBTreeTupleFromFileNumber(deleteTuple, buddyBTreeTupleBuilder, fileNumber);
((ITwoPCIndexBulkLoader) bulkLoader).delete(deleteTuple);
}
} catch (Exception e) {
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
index 84e3d38..6010e54 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
@@ -226,27 +226,22 @@ public class JObjectAccessors {
}
public static class JStringAccessor implements IJObjectAccessor {
- private final ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
+ private final AStringSerializerDeserializer aStringSerDer = new AStringSerializerDeserializer();
@Override
public IJObject access(IVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool)
throws HyracksDataException {
- IJObject jObject = objectPool.allocate(BuiltinType.ASTRING);
+ byte[] b = pointable.getByteArray();
+ int s = pointable.getStartOffset();
+ int l = pointable.getLength();
- try {
- byte byteArray[] = pointable.getByteArray();
- int len = pointable.getLength()-3;
- int off = pointable.getStartOffset()+3;
- baaos.reset();
- if(off >= 0 && off <= byteArray.length && len >= 0 && off + len - byteArray.length <= 0) {
- baaos.write(byteArray, off, len);
- ((JString) jObject).setValue(JObjectUtil.getNormalizedString(baaos.toString("UTF-8")));
- } else {
- ((JString) jObject).setValue("");
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
+ String v = null;
+ v = aStringSerDer.deserialize(
+ new DataInputStream(new ByteArrayInputStream(b, s + 1, l - 1))).getStringValue();
+ JObjectUtil.getNormalizedString(v);
+
+ IJObject jObject = objectPool.allocate(BuiltinType.ASTRING);
+ ((JString) jObject).setValue(JObjectUtil.getNormalizedString(v));
return jObject;
}
}
@@ -449,6 +444,7 @@ public class JObjectAccessors {
private final JRecord jRecord;
private final IJObject[] jObjects;
private final LinkedHashMap<String, IJObject> openFields;
+ private final AStringSerializerDeserializer aStringSerDer = new AStringSerializerDeserializer();
public JRecordAccessor(ARecordType recordType, IObjectPool<IJObject, IAType> objectPool) {
this.typeInfo = new TypeInfo(objectPool, null, null);
@@ -491,7 +487,8 @@ public class JObjectAccessors {
// value is null
fieldObject = null;
} else {
- fieldObject = pointableVisitor.visit((AListVisitablePointable) fieldPointable, typeInfo);
+ fieldObject = pointableVisitor
+ .visit((AListVisitablePointable) fieldPointable, typeInfo);
}
break;
case ANY:
@@ -505,7 +502,7 @@ public class JObjectAccessors {
byte[] b = fieldName.getByteArray();
int s = fieldName.getStartOffset();
int l = fieldName.getLength();
- String v = AStringSerializerDeserializer.INSTANCE.deserialize(
+ String v = aStringSerDer.deserialize(
new DataInputStream(new ByteArrayInputStream(b, s + 1, l - 1))).getStringValue();
openFields.put(v, fieldObject);
}
@@ -540,7 +537,8 @@ public class JObjectAccessors {
}
@Override
- public IJObject access(AListVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool, IAType listType,
+ public IJObject access(AListVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool,
+ IAType listType,
JObjectPointableVisitor pointableVisitor) throws HyracksDataException {
List<IVisitablePointable> items = pointable.getItems();
List<IVisitablePointable> itemTags = pointable.getItemTags();
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
index 5bf87bc..a0710ff 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
@@ -392,7 +392,7 @@ public class JObjectUtil {
dis.readInt();
}
for (int i = 0; i < numberOfOpenFields; i++) {
- fieldNames[i] = AStringSerializerDeserializer.INSTANCE.deserialize(dis).getStringValue();
+ fieldNames[i] = new AStringSerializerDeserializer().deserialize(dis).getStringValue();
ATypeTag openFieldTypeTag = SerializerDeserializerUtil.deserializeTag(dis);
openFields[i] = getJType(openFieldTypeTag, null, dis, objectPool);
fieldTypes[i] = openFields[i].getIAObject().getType();
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
index c3843e1..02f7b4b 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
@@ -367,6 +367,8 @@ public class JObjects {
public static final class JString extends JObject {
+ private final AStringSerializerDeserializer aStringSerDer = new AStringSerializerDeserializer();
+
public JString(String v) {
super(new AMutableString(v));
}
@@ -388,7 +390,7 @@ public class JObjects {
throw new HyracksDataException(e);
}
}
- AStringSerializerDeserializer.INSTANCE.serialize((AString) value, dataOutput);
+ aStringSerDer.serialize((AString) value, dataOutput);
}
@Override
@@ -976,6 +978,8 @@ public class JObjects {
private ARecordType recordType;
private IJObject[] fields;
private Map<String, IJObject> openFields;
+ private final AStringSerializerDeserializer aStringSerDer = new AStringSerializerDeserializer();
+
public JRecord(ARecordType recordType, IJObject[] fields) {
this.recordType = recordType;
@@ -1104,7 +1108,7 @@ public class JObjects {
openFieldValue.reset();
nameValue.setValue(entry.getKey());
openFieldName.getDataOutput().write(ATypeTag.STRING.serialize());
- AStringSerializerDeserializer.INSTANCE.serialize(nameValue, openFieldName.getDataOutput());
+ aStringSerDer.serialize(nameValue, openFieldName.getDataOutput());
entry.getValue().serialize(openFieldValue.getDataOutput(), true);
recordBuilder.addField(openFieldName, openFieldValue);
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/pom.xml
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/pom.xml b/asterix-fuzzyjoin/pom.xml
index a96c926..f95bcc1 100644
--- a/asterix-fuzzyjoin/pom.xml
+++ b/asterix-fuzzyjoin/pom.xml
@@ -66,6 +66,14 @@
<groupId>org.apache.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+ </dependency>
</dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
index 8b6b23d..ea57fd2 100644
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
+++ b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
@@ -21,13 +21,11 @@ package org.apache.asterix.fuzzyjoin.similarity;
import java.util.Arrays;
-import org.apache.asterix.fuzzyjoin.tokenizer.StringUtils;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
- private final int utf8SizeIndicatorSize = 2;
-
// dp implementation only needs 2 rows
private final int rows = 2;
private int cols;
@@ -159,8 +157,13 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
// faster implementation for common case of string edit distance
public int UTF8StringEditDistance(byte[] bytes, int fsStart, int ssStart) {
- int fsLen = StringUtils.getStrLen(bytes, fsStart);
- int ssLen = StringUtils.getStrLen(bytes, ssStart);
+ int fsLen = UTF8StringUtil.getStringLength(bytes, fsStart);
+ int ssLen = UTF8StringUtil.getStringLength(bytes, ssStart);
+
+ int fsUtfLen = UTF8StringUtil.getUTFLength(bytes, fsStart);
+ int ssUtfLen = UTF8StringUtil.getUTFLength(bytes, ssStart);
+ int fsMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fsUtfLen);
+ int ssMetaLen = UTF8StringUtil.getNumBytesToStoreLength(ssUtfLen);
// reuse existing matrix if possible
if (ssLen >= cols) {
@@ -168,8 +171,8 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
matrix = new int[rows][cols];
}
- int fsDataStart = fsStart + utf8SizeIndicatorSize;
- int ssDataStart = ssStart + utf8SizeIndicatorSize;
+ int fsDataStart = fsStart + fsMetaLen;
+ int ssDataStart = ssStart + ssMetaLen;
// init matrix
for (int i = 0; i <= ssLen; i++) {
@@ -183,19 +186,19 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
int fsPos = fsDataStart;
for (int i = 1; i <= fsLen; i++) {
matrix[currRow][0] = i;
- char fsChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, fsPos));
+ char fsChar = Character.toLowerCase(UTF8StringUtil.charAt(bytes, fsPos));
int ssPos = ssDataStart;
for (int j = 1; j <= ssLen; j++) {
- char ssChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, ssPos));
+ char ssChar = Character.toLowerCase(UTF8StringUtil.charAt(bytes, ssPos));
matrix[currRow][j] = Math.min(Math.min(matrix[prevRow][j] + 1, matrix[currRow][j - 1] + 1),
matrix[prevRow][j - 1] + (fsChar == ssChar ? 0 : 1));
- ssPos += StringUtils.charSize(bytes, ssPos);
+ ssPos += UTF8StringUtil.charSize(bytes, ssPos);
}
- fsPos += StringUtils.charSize(bytes, fsPos);
+ fsPos += UTF8StringUtil.charSize(bytes, fsPos);
int tmp = currRow;
currRow = prevRow;
@@ -207,8 +210,13 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
public int UTF8StringEditDistance(byte[] bytes, int fsStart, int ssStart, int edThresh) {
- int fsStrLen = StringUtils.getStrLen(bytes, fsStart);
- int ssStrLen = StringUtils.getStrLen(bytes, ssStart);
+ int fsStrLen = UTF8StringUtil.getStringLength(bytes, fsStart);
+ int ssStrLen = UTF8StringUtil.getStringLength(bytes, ssStart);
+
+ int fsUtfLen = UTF8StringUtil.getUTFLength(bytes, fsStart);
+ int ssUtfLen = UTF8StringUtil.getUTFLength(bytes, ssStart);
+ int fsMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fsUtfLen);
+ int ssMetaLen = UTF8StringUtil.getNumBytesToStoreLength(ssUtfLen);
// length filter
if (Math.abs(fsStrLen - ssStrLen) > edThresh) {
@@ -220,25 +228,25 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
Arrays.fill(ssLcCount, 0);
// compute letter counts for first string
- int fsPos = fsStart + utf8SizeIndicatorSize;
- int fsEnd = fsPos + StringUtils.getUTFLen(bytes, fsStart);;
+ int fsPos = fsStart + fsMetaLen;
+ int fsEnd = fsPos + fsUtfLen;;
while (fsPos < fsEnd) {
- char c = StringUtils.toLowerCase(StringUtils.charAt(bytes, fsPos));
+ char c = Character.toLowerCase(UTF8StringUtil.charAt(bytes, fsPos));
if (c < 128) {
fsLcCount[c]++;
}
- fsPos += StringUtils.charSize(bytes, fsPos);
+ fsPos += UTF8StringUtil.charSize(bytes, fsPos);
}
// compute letter counts for second string
- int ssPos = ssStart + utf8SizeIndicatorSize;
- int ssEnd = ssPos + StringUtils.getUTFLen(bytes, ssStart);
+ int ssPos = ssStart + ssMetaLen;
+ int ssEnd = ssPos + ssUtfLen;
while (ssPos < ssEnd) {
- char c = StringUtils.toLowerCase(StringUtils.charAt(bytes, ssPos));
+ char c = Character.toLowerCase(UTF8StringUtil.charAt(bytes, ssPos));
if (c < 128) {
ssLcCount[c]++;
}
- ssPos += StringUtils.charSize(bytes, ssPos);
+ ssPos += UTF8StringUtil.charSize(bytes, ssPos);
}
// apply filter
@@ -269,8 +277,14 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
// checks whether the first string contains a similar string to the second string
public int UTF8StringEditDistanceContains(byte[] bytes, int stringStart, int patternStart, int edThresh) {
- int stringLen = StringUtils.getStrLen(bytes, stringStart);
- int patternLen = StringUtils.getStrLen(bytes, patternStart);
+ int stringLen = UTF8StringUtil.getStringLength(bytes, stringStart);
+ int patternLen = UTF8StringUtil.getStringLength(bytes, patternStart);
+
+ int stringUTFLen = UTF8StringUtil.getUTFLength(bytes, stringStart);
+ int stringMetaLen = UTF8StringUtil.getNumBytesToStoreLength(stringUTFLen);
+
+ int patternUTFLen = UTF8StringUtil.getUTFLength(bytes, patternStart);
+ int patternMetaLen = UTF8StringUtil.getNumBytesToStoreLength(patternUTFLen);
// reuse existing matrix if possible
if (patternLen >= cols) {
@@ -278,8 +292,8 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
matrix = new int[rows][cols];
}
- int stringDataStart = stringStart + utf8SizeIndicatorSize;
- int patternDataStart = patternStart + utf8SizeIndicatorSize;
+ int stringDataStart = stringStart + stringMetaLen;
+ int patternDataStart = patternStart + patternMetaLen;
// init matrix
for (int i = 0; i <= patternLen; i++) {
@@ -293,23 +307,23 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
int stringPos = stringDataStart;
for (int i = 1; i <= stringLen; i++) {
matrix[currRow][0] = 0;
- char stringChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, stringPos));
+ char stringChar = Character.toLowerCase(UTF8StringUtil.charAt(bytes, stringPos));
int patternPos = patternDataStart;
for (int j = 1; j <= patternLen; j++) {
- char patternChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, patternPos));
+ char patternChar = Character.toLowerCase(UTF8StringUtil.charAt(bytes, patternPos));
matrix[currRow][j] = Math.min(Math.min(matrix[prevRow][j] + 1, matrix[currRow][j - 1] + 1),
matrix[prevRow][j - 1] + (stringChar == patternChar ? 0 : 1));
- patternPos += StringUtils.charSize(bytes, patternPos);
+ patternPos += UTF8StringUtil.charSize(bytes, patternPos);
if (j == patternLen && matrix[currRow][patternLen] < minEd) {
minEd = matrix[currRow][patternLen];
}
}
- stringPos += StringUtils.charSize(bytes, stringPos);
+ stringPos += UTF8StringUtil.charSize(bytes, stringPos);
int tmp = currRow;
currRow = prevRow;
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java
deleted file mode 100644
index fe90e05..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import org.apache.asterix.fuzzyjoin.IntArray;
-
-public abstract class AbstractUTF8StringBinaryTokenizer implements IBinaryTokenizer {
-
- protected byte[] data;
- protected int start;
- protected int length;
- protected int tokenLength;
- protected int index;
- protected int utf8Length;
-
- protected final IntArray tokensStart;
- protected final IntArray tokensLength;
- protected final IToken token;
-
- protected final boolean ignoreTokenCount;
- protected final boolean sourceHasTypeTag;
-
- public AbstractUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag,
- ITokenFactory tokenFactory) {
- this.ignoreTokenCount = ignoreTokenCount;
- this.sourceHasTypeTag = sourceHasTypeTag;
- if (!ignoreTokenCount) {
- tokensStart = new IntArray();
- tokensLength = new IntArray();
- } else {
- tokensStart = null;
- tokensLength = null;
- }
- token = tokenFactory.createToken();
- }
-
- @Override
- public IToken getToken() {
- return token;
- }
-
- @Override
- public void reset(byte[] data, int start, int length) {
- this.start = start;
- index = this.start;
- if (sourceHasTypeTag) {
- index++; // skip type tag
- }
- utf8Length = StringUtils.getUTFLen(data, index);
- index += 2; // skip utf8 length indicator
- this.data = data;
- this.length = length + start;
-
- tokenLength = 0;
- if (!ignoreTokenCount) {
- tokensStart.reset();
- tokensLength.reset();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java
deleted file mode 100644
index 835d591..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-public abstract class AbstractUTF8Token implements IToken {
- public static final int GOLDEN_RATIO_32 = 0x09e3779b9;
-
- protected int length;
- protected int tokenLength;
- protected int start;
- protected int tokenCount;
- protected byte[] data;
- protected final byte tokenTypeTag;
- protected final byte countTypeTag;
-
- public AbstractUTF8Token() {
- tokenTypeTag = -1;
- countTypeTag = -1;
- }
-
- public AbstractUTF8Token(byte tokenTypeTag, byte countTypeTag) {
- this.tokenTypeTag = tokenTypeTag;
- this.countTypeTag = countTypeTag;
- }
-
- @Override
- public byte[] getData() {
- return data;
- }
-
- @Override
- public int getLength() {
- return length;
- }
-
- public int getLowerCaseUTF8Len(int size) {
- int lowerCaseUTF8Len = 0;
- int pos = start;
- for (int i = 0; i < size; i++) {
- char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
- lowerCaseUTF8Len += StringUtils.getModifiedUTF8Len(c);
- pos += StringUtils.charSize(data, pos);
- }
- return lowerCaseUTF8Len;
- }
-
- @Override
- public int getStart() {
- return start;
- }
-
- @Override
- public int getTokenLength() {
- return tokenLength;
- }
-
- public void handleCountTypeTag(DataOutput dos) throws IOException {
- if (countTypeTag > 0) {
- dos.write(countTypeTag);
- }
- }
-
- public void handleTokenTypeTag(DataOutput dos) throws IOException {
- if (tokenTypeTag > 0) {
- dos.write(tokenTypeTag);
- }
- }
-
- @Override
- public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
- this.data = data;
- this.start = start;
- this.length = length;
- this.tokenLength = tokenLength;
- this.tokenCount = tokenCount;
- }
-
- @Override
- public void serializeTokenCount(DataOutput dos) throws IOException {
- handleCountTypeTag(dos);
- dos.writeInt(tokenCount);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java
deleted file mode 100644
index 849bfd1..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-public abstract class AbstractUTF8TokenFactory implements ITokenFactory {
- private static final long serialVersionUID = 1L;
- protected final byte tokenTypeTag;
- protected final byte countTypeTag;
-
- public AbstractUTF8TokenFactory() {
- tokenTypeTag = -1;
- countTypeTag = -1;
- }
-
- public AbstractUTF8TokenFactory(byte tokenTypeTag, byte countTypeTag) {
- this.tokenTypeTag = tokenTypeTag;
- this.countTypeTag = countTypeTag;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java
deleted file mode 100644
index 4b11026..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
-
- public DelimitedUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag,
- ITokenFactory tokenFactory) {
- super(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
- }
-
- @Override
- public boolean hasNext() {
- // skip delimiters
- while (index < length && isSeparator(StringUtils.charAt(data, index))) {
- index += StringUtils.charSize(data, index);
- }
- return index < length;
- }
-
- private boolean isSeparator(char c) {
- return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
- }
-
- @Override
- public void next() {
- tokenLength = 0;
- int currentTokenStart = index;
- while (index < length && !isSeparator(StringUtils.charAt(data, index))) {
- index += StringUtils.charSize(data, index);
- tokenLength++;
- }
- int tokenCount = 1;
- if (tokenLength > 0 && !ignoreTokenCount) {
- // search if we got the same token before
- for (int i = 0; i < tokensStart.length(); ++i) {
- if (tokenLength == tokensLength.get(i)) {
- int tokenStart = tokensStart.get(i);
- tokenCount++; // assume we found it
- int offset = 0;
- int currLength = 0;
- while (currLength < tokenLength) {
- // case insensitive comparison
- if (StringUtils.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != StringUtils
- .toLowerCase(StringUtils.charAt(data, tokenStart + offset))) {
- tokenCount--;
- break;
- }
- offset += StringUtils.charSize(data, currentTokenStart + offset);
- currLength++;
- }
- }
- }
- // add the new token to the list of seen tokens
- tokensStart.add(currentTokenStart);
- tokensLength.add(tokenLength);
- }
-
- // set token
- token.reset(data, currentTokenStart, index, tokenLength, tokenCount);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java
deleted file mode 100644
index ba49e5c..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-public class DelimitedUTF8StringBinaryTokenizerFactory implements IBinaryTokenizerFactory {
-
- private static final long serialVersionUID = 1L;
- private final boolean ignoreTokenCount;
- private final boolean sourceHasTypeTag;
- private final ITokenFactory tokenFactory;
-
- public DelimitedUTF8StringBinaryTokenizerFactory(boolean ignoreTokenCount, boolean sourceHasTypeTag,
- ITokenFactory tokenFactory) {
- this.ignoreTokenCount = ignoreTokenCount;
- this.sourceHasTypeTag = sourceHasTypeTag;
- this.tokenFactory = tokenFactory;
- }
-
- @Override
- public IBinaryTokenizer createTokenizer() {
- return new DelimitedUTF8StringBinaryTokenizer(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java
deleted file mode 100644
index 786d07b..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-public class HashedUTF8NGramToken extends UTF8NGramToken {
- public HashedUTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
- super(tokenTypeTag, countTypeTag);
- }
-
- @Override
- public void serializeToken(DataOutput dos) throws IOException {
- handleTokenTypeTag(dos);
-
- int hash = GOLDEN_RATIO_32;
-
- // pre chars
- for (int i = 0; i < numPreChars; i++) {
- hash ^= PRECHAR;
- hash *= GOLDEN_RATIO_32;
- }
-
- // regular chars
- int numRegGrams = tokenLength - numPreChars - numPostChars;
- int pos = start;
- for (int i = 0; i < numRegGrams; i++) {
- hash ^= StringUtils.toLowerCase(StringUtils.charAt(data, pos));
- hash *= GOLDEN_RATIO_32;
- pos += StringUtils.charSize(data, pos);
- }
-
- // post chars
- for (int i = 0; i < numPostChars; i++) {
- hash ^= POSTCHAR;
- hash *= GOLDEN_RATIO_32;
- }
-
- // token count
- hash += tokenCount;
-
- dos.writeInt(hash);
- }
-}