You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "Wenhai (JIRA)" <ji...@apache.org> on 2016/07/25 04:20:20 UTC
[jira] [Updated] (ASTERIXDB-1544) Omit the fuzzyjoin on inverted
index
[ https://issues.apache.org/jira/browse/ASTERIXDB-1544?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Wenhai updated ASTERIXDB-1544:
------------------------------
Description:
In the current master, we have NO testCases covering the fuzzyjoin on the (inverted) indexed field. Once we trigger a fuzzy join "~=" on a indexed field, we will always get a error with following log.
Schema
{noformat}
drop dataverse fuzzyjointest if exists;
create dataverse fuzzyjointest;
use dataverse fuzzyjointest;
create type DBLPType as open {
tid: uuid,
id: int64,
dblpid: string?,
title: string?,
authors: string?,
misc: string?
}
create type CSXType as closed {
tid: uuid,
id: int64,
csxid: string?,
title: string?,
authors: string?,
misc: string?
}
create dataset DBLP(DBLPType) primary key tid autogenerated;
create dataset CSX(CSXType) primary key tid autogenerated;
load dataset DBLP
using localfs
(("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted;
load dataset CSX
using localfs
(("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
use dataverse fuzzyjointest;
drop index DBLP.title_index if exists;
create index title_index on DBLP(title) type keyword;
drop index DBLP.author_index if exists;
create index author_index on DBLP(authors) type keyword;
drop index CSX.csx_author_index if exists;
create index csx_author_index on CSX(authors) type keyword;
{noformat}
Query
{noformat}
use dataverse fuzzyjointest;
set simthreshold '.7f'
for $o in dataset('DBLP')
for $t in dataset('CSX')
where word-tokens($o.authors) ~= word-tokens($t.authors)
return {"cid": $t.id, "did": $o.id}
{noformat}
The same error occurs similar to issue-1487.
{noformat}
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null
at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
at org.apache.hyracks.control.nc.Task.run(Task.java:319)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
at org.apache.hyracks.control.nc.Task.run(Task.java:297)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
... 4 more
Caused by: java.lang.NullPointerException
at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
... 5 more
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null
at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
at org.apache.hyracks.control.nc.Task.run(Task.java:319)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
at org.apache.hyracks.control.nc.Task.run(Task.java:297)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
... 4 more
Caused by: java.lang.NullPointerException
at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
... 5 more
Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
SEVERE: Job failed on account of:
HYR0002: null
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null
at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
at org.apache.hyracks.control.nc.Task.run(Task.java:319)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
at org.apache.hyracks.control.nc.Task.run(Task.java:297)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
... 4 more
Caused by: java.lang.NullPointerException
at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
... 5 more
{noformat}
was:
In the current master, we have NO testCases covering the fuzzyjoin on the (inverted) indexed field. Once we trigger a fuzzy join "~=" on a indexed field, we will always get a error with following log.
The same error occurs similar to issue-1487.
{noformat}
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null
at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
at org.apache.hyracks.control.nc.Task.run(Task.java:319)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
at org.apache.hyracks.control.nc.Task.run(Task.java:297)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
... 4 more
Caused by: java.lang.NullPointerException
at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
... 5 more
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null
at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
at org.apache.hyracks.control.nc.Task.run(Task.java:319)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
at org.apache.hyracks.control.nc.Task.run(Task.java:297)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
... 4 more
Caused by: java.lang.NullPointerException
at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
... 5 more
Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
SEVERE: Job failed on account of:
HYR0002: null
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null
at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
at org.apache.hyracks.control.nc.Task.run(Task.java:319)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
at org.apache.hyracks.control.nc.Task.run(Task.java:297)
... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
... 4 more
Caused by: java.lang.NullPointerException
at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
... 5 more
{noformat}
> Omit the fuzzyjoin on inverted index
> ------------------------------------
>
> Key: ASTERIXDB-1544
> URL: https://issues.apache.org/jira/browse/ASTERIXDB-1544
> Project: Apache AsterixDB
> Issue Type: Bug
> Environment: MAC/linux
> Reporter: Wenhai
> Assignee: Wenhai
> Priority: Critical
>
> In the current master, we have NO testCases covering the fuzzyjoin on the (inverted) indexed field. Once we trigger a fuzzy join "~=" on a indexed field, we will always get a error with following log.
> Schema
> {noformat}
> drop dataverse fuzzyjointest if exists;
> create dataverse fuzzyjointest;
> use dataverse fuzzyjointest;
> create type DBLPType as open {
> tid: uuid,
> id: int64,
> dblpid: string?,
> title: string?,
> authors: string?,
> misc: string?
> }
> create type CSXType as closed {
> tid: uuid,
> id: int64,
> csxid: string?,
> title: string?,
> authors: string?,
> misc: string?
> }
> create dataset DBLP(DBLPType) primary key tid autogenerated;
> create dataset CSX(CSXType) primary key tid autogenerated;
> load dataset DBLP
> using localfs
> (("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted;
> load dataset CSX
> using localfs
> (("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
> use dataverse fuzzyjointest;
> drop index DBLP.title_index if exists;
> create index title_index on DBLP(title) type keyword;
> drop index DBLP.author_index if exists;
> create index author_index on DBLP(authors) type keyword;
> drop index CSX.csx_author_index if exists;
> create index csx_author_index on CSX(authors) type keyword;
> {noformat}
> Query
> {noformat}
> use dataverse fuzzyjointest;
> set simthreshold '.7f'
> for $o in dataset('DBLP')
> for $t in dataset('CSX')
> where word-tokens($o.authors) ~= word-tokens($t.authors)
> return {"cid": $t.id, "did": $o.id}
> {noformat}
> The same error occurs similar to issue-1487.
> {noformat}
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
> at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
> at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
> at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
> at org.apache.hyracks.control.nc.Task.run(Task.java:319)
> ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
> at org.apache.hyracks.control.nc.Task.run(Task.java:297)
> ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
> at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
> ... 4 more
> Caused by: java.lang.NullPointerException
> at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
> at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
> ... 5 more
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
> at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
> at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
> at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
> at org.apache.hyracks.control.nc.Task.run(Task.java:319)
> ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
> at org.apache.hyracks.control.nc.Task.run(Task.java:297)
> ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
> at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
> ... 4 more
> Caused by: java.lang.NullPointerException
> at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
> at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
> ... 5 more
> Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
> SEVERE: Job failed on account of:
> HYR0002: null
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
> at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
> at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
> at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
> at org.apache.hyracks.control.nc.Task.run(Task.java:319)
> ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
> at org.apache.hyracks.control.nc.Task.run(Task.java:297)
> ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
> at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
> ... 4 more
> Caused by: java.lang.NullPointerException
> at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
> at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
> ... 5 more
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)