You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "Wenhai (JIRA)" <ji...@apache.org> on 2016/07/25 04:20:20 UTC

[jira] [Updated] (ASTERIXDB-1544) Omit the fuzzyjoin on inverted index

     [ https://issues.apache.org/jira/browse/ASTERIXDB-1544?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Wenhai updated ASTERIXDB-1544:
------------------------------
    Description: 
In the current master, we have NO testCases covering the fuzzyjoin on the (inverted) indexed field. Once we trigger a fuzzy join "~=" on a indexed field, we will always get a error with following log.
Schema
{noformat}
drop dataverse fuzzyjointest if exists;
create dataverse fuzzyjointest;
use dataverse fuzzyjointest;

create type DBLPType as open {
  tid: uuid,
  id: int64,
  dblpid: string?,
  title: string?,
  authors: string?,
  misc: string?
}

create type CSXType as closed {
  tid: uuid,
  id: int64,
  csxid: string?,
  title: string?,
  authors: string?,
  misc: string?
}

create dataset DBLP(DBLPType) primary key tid autogenerated;

create dataset CSX(CSXType) primary key tid autogenerated;

load dataset DBLP
using localfs
(("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted;

load dataset CSX
using localfs
(("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));

use dataverse fuzzyjointest;
drop index DBLP.title_index if exists;
create index title_index on DBLP(title) type keyword;
drop index DBLP.author_index if exists;
create index author_index on DBLP(authors) type keyword;
drop index CSX.csx_author_index if exists;
create index csx_author_index on CSX(authors) type keyword;
{noformat}

Query
{noformat}
use dataverse fuzzyjointest;
set simthreshold '.7f'
for $o in dataset('DBLP')
for $t in dataset('CSX')
where word-tokens($o.authors) ~= word-tokens($t.authors)
return {"cid": $t.id, "did": $o.id}
{noformat}

The same error occurs similar to issue-1487.
{noformat}
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
SEVERE: Job failed on account of:
HYR0002: null

org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
{noformat}

  was:
In the current master, we have NO testCases covering the fuzzyjoin on the (inverted) indexed field. Once we trigger a fuzzy join "~=" on a indexed field, we will always get a error with following log.
The same error occurs similar to issue-1487.
{noformat}
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
SEVERE: Job failed on account of:
HYR0002: null

org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
{noformat}


> Omit the fuzzyjoin on inverted index
> ------------------------------------
>
>                 Key: ASTERIXDB-1544
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1544
>             Project: Apache AsterixDB
>          Issue Type: Bug
>         Environment: MAC/linux
>            Reporter: Wenhai
>            Assignee: Wenhai
>            Priority: Critical
>
> In the current master, we have NO testCases covering the fuzzyjoin on the (inverted) indexed field. Once we trigger a fuzzy join "~=" on a indexed field, we will always get a error with following log.
> Schema
> {noformat}
> drop dataverse fuzzyjointest if exists;
> create dataverse fuzzyjointest;
> use dataverse fuzzyjointest;
> create type DBLPType as open {
>   tid: uuid,
>   id: int64,
>   dblpid: string?,
>   title: string?,
>   authors: string?,
>   misc: string?
> }
> create type CSXType as closed {
>   tid: uuid,
>   id: int64,
>   csxid: string?,
>   title: string?,
>   authors: string?,
>   misc: string?
> }
> create dataset DBLP(DBLPType) primary key tid autogenerated;
> create dataset CSX(CSXType) primary key tid autogenerated;
> load dataset DBLP
> using localfs
> (("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted;
> load dataset CSX
> using localfs
> (("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
> use dataverse fuzzyjointest;
> drop index DBLP.title_index if exists;
> create index title_index on DBLP(title) type keyword;
> drop index DBLP.author_index if exists;
> create index author_index on DBLP(authors) type keyword;
> drop index CSX.csx_author_index if exists;
> create index csx_author_index on CSX(authors) type keyword;
> {noformat}
> Query
> {noformat}
> use dataverse fuzzyjointest;
> set simthreshold '.7f'
> for $o in dataset('DBLP')
> for $t in dataset('CSX')
> where word-tokens($o.authors) ~= word-tokens($t.authors)
> return {"cid": $t.id, "did": $o.id}
> {noformat}
> The same error occurs similar to issue-1487.
> {noformat}
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
> 	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
> 	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> 	at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
> 	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
> 	... 4 more
> Caused by: java.lang.NullPointerException
> 	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
> 	... 5 more
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
> 	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
> 	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> 	at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
> 	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
> 	... 4 more
> Caused by: java.lang.NullPointerException
> 	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
> 	... 5 more
> Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
> SEVERE: Job failed on account of:
> HYR0002: null
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
> 	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
> 	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> 	at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
> 	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
> 	... 4 more
> Caused by: java.lang.NullPointerException
> 	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
> 	... 5 more
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)