You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "Jianfeng Jia (JIRA)" <ji...@apache.org> on 2016/06/03 22:57:59 UTC

[jira] [Updated] (ASTERIXDB-1472) Exception when ingesting the data with filter on a field

     [ https://issues.apache.org/jira/browse/ASTERIXDB-1472?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Jianfeng Jia updated ASTERIXDB-1472:
------------------------------------
    Description: 
Here is the aql:
{code}
drop dataverse twitter if exists;
create dataverse twitter if not exists;
use dataverse twitter

create type typeUser if not exists as open {
    id: int64,
    name: string,
    screen_name : string,
    lang : string,
    location: string,
    create_at: date,
    description: string,
    followers_count: int32,
    friends_count: int32,
    statues_count: int64
}

create type typePlace if not exists as open{
    country : string,
    country_code : string,
    full_name : string,
    id : string,
    name : string,
    place_type : string,
    bounding_box : rectangle
}

create type typeGeoTag if not exists as open {
    stateID: int32,
    stateName: string,
    countyID: int32,
    countyName: string,
    cityID: int32?,
    cityName: string?
}

create type typeTweet if not exists as open{
    create_at : datetime,
    id: int64,
    "text": string,
    in_reply_to_status : int64,
    in_reply_to_user : int64,
    favorite_count : int64,
    coordinate: point?,
    retweet_count : int64,
    lang : string,
    is_retweet: boolean,
    hashtags : {{ string }} ?,
    user_mentions : {{ int64 }} ? ,
    user : typeUser,
    place : typePlace?,
    geo_tag: typeGeoTag
}

create dataset ds_tweet(typeTweet) if not exists primary key id with filter on create_at;
//"using" "compaction" "policy" CompactionPolicy ( Configuration )? )?
create index text_idx if not exists on ds_tweet("text") type keyword;
create index location_idx if not exists on ds_tweet(coordinate) type rtree;
create index time_idx if not exists on ds_tweet(create_at) type btree;
create index state_idx if not exists on ds_tweet(geo_tag.stateID) type btree;
create index county_idx if not exists on ds_tweet(geo_tag.countyID) type btree;
create index city_idx if not exists on ds_tweet(geo_tag.cityID) type btree;

create feed MessageFeed using localfs(
("path"="128.195.52.77:///home/jianfeng/data/head20m.adm"),
("format"="adm"),
("type-name"="typeTweet"));

set wait-for-completion-feed "true";
connect feed MessageFeed to dataset ds_tweet;

{code}

The exception seems related to the Merging phase 
{code}
java.lang.IllegalStateException
    at org.apache.hyracks.storage.am.lsm.common.impls.PrefixMergePolicy.isMergeLagging(PrefixMergePolicy.java:151)
    at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.exitComponents(LSMHarness.java:211)
    at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:437)
    at org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:105)
    at org.apache.hyracks.storage.am.lsm.rtree.impls.LSMRTreeFlushOperation.call(LSMRTreeFlushOperation.java:74)
    at org.apache.hyracks.storage.am.lsm.rtree.impls.LSMRTreeFlushOperation.call(LSMRTreeFlushOperation.java:34)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:744)
{code}

I uploaded small sample data [here|https://drive.google.com/open?id=0B423M7wGZj9ddlN2Zk1SZmFEOGs]

After that, the system hung there, and the read won't proceed.

If I remove the "with filter xx" statement, then everything works fine.


  was:
Here is the aql:
{code}
drop dataverse twitter if exists;
create dataverse twitter if not exists;
use dataverse twitter

create type typeUser if not exists as open {
    id: int64,
    name: string,
    screen_name : string,
    lang : string,
    location: string,
    create_at: date,
    description: string,
    followers_count: int32,
    friends_count: int32,
    statues_count: int64
}

create type typePlace if not exists as open{
    country : string,
    country_code : string,
    full_name : string,
    id : string,
    name : string,
    place_type : string,
    bounding_box : rectangle
}

create type typeGeoTag if not exists as open {
    stateID: int32,
    stateName: string,
    countyID: int32,
    countyName: string,
    cityID: int32?,
    cityName: string?
}

create type typeTweet if not exists as open{
    create_at : datetime,
    id: int64,
    "text": string,
    in_reply_to_status : int64,
    in_reply_to_user : int64,
    favorite_count : int64,
    coordinate: point?,
    retweet_count : int64,
    lang : string,
    is_retweet: boolean,
    hashtags : {{ string }} ?,
    user_mentions : {{ int64 }} ? ,
    user : typeUser,
    place : typePlace?,
    geo_tag: typeGeoTag
}

create dataset ds_tweet(typeTweet) if not exists primary key id with filter on create_at;
//"using" "compaction" "policy" CompactionPolicy ( Configuration )? )?
create index text_idx if not exists on ds_tweet("text") type keyword;
create index location_idx if not exists on ds_tweet(coordinate) type rtree;
create index time_idx if not exists on ds_tweet(create_at) type btree;
create index state_idx if not exists on ds_tweet(geo_tag.stateID) type btree;
create index county_idx if not exists on ds_tweet(geo_tag.countyID) type btree;
create index city_idx if not exists on ds_tweet(geo_tag.cityID) type btree;

create feed MessageFeed using localfs(
("path"="128.195.52.77:///home/jianfeng/data/head20m.adm"),
("format"="adm"),
("type-name"="typeTweet"));

set wait-for-completion-feed "true";
connect feed MessageFeed to dataset ds_tweet;

{code}

The exception seems related to the Merging phase 
{code}
java.lang.IllegalStateException
    at org.apache.hyracks.storage.am.lsm.common.impls.PrefixMergePolicy.isMergeLagging(PrefixMergePolicy.java:151)
    at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.exitComponents(LSMHarness.java:211)
    at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:437)
    at org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:105)
    at org.apache.hyracks.storage.am.lsm.rtree.impls.LSMRTreeFlushOperation.call(LSMRTreeFlushOperation.java:74)
    at org.apache.hyracks.storage.am.lsm.rtree.impls.LSMRTreeFlushOperation.call(LSMRTreeFlushOperation.java:34)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:744)
{code}

I uploaded small sample data [here|https://drive.google.com/open?id=0B423M7wGZj9ddlN2Zk1SZmFEOGs]




> Exception when ingesting the data with filter on a field
> --------------------------------------------------------
>
>                 Key: ASTERIXDB-1472
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1472
>             Project: Apache AsterixDB
>          Issue Type: Bug
>          Components: Storage
>         Environment: master code:
> commit 2dff79736e6f3f877149317d02395dbd12e16a20
> Date:   Thu Jun 2 23:13:52 2016 -0700
>            Reporter: Jianfeng Jia
>            Assignee: Murtadha Hubail
>
> Here is the aql:
> {code}
> drop dataverse twitter if exists;
> create dataverse twitter if not exists;
> use dataverse twitter
> create type typeUser if not exists as open {
>     id: int64,
>     name: string,
>     screen_name : string,
>     lang : string,
>     location: string,
>     create_at: date,
>     description: string,
>     followers_count: int32,
>     friends_count: int32,
>     statues_count: int64
> }
> create type typePlace if not exists as open{
>     country : string,
>     country_code : string,
>     full_name : string,
>     id : string,
>     name : string,
>     place_type : string,
>     bounding_box : rectangle
> }
> create type typeGeoTag if not exists as open {
>     stateID: int32,
>     stateName: string,
>     countyID: int32,
>     countyName: string,
>     cityID: int32?,
>     cityName: string?
> }
> create type typeTweet if not exists as open{
>     create_at : datetime,
>     id: int64,
>     "text": string,
>     in_reply_to_status : int64,
>     in_reply_to_user : int64,
>     favorite_count : int64,
>     coordinate: point?,
>     retweet_count : int64,
>     lang : string,
>     is_retweet: boolean,
>     hashtags : {{ string }} ?,
>     user_mentions : {{ int64 }} ? ,
>     user : typeUser,
>     place : typePlace?,
>     geo_tag: typeGeoTag
> }
> create dataset ds_tweet(typeTweet) if not exists primary key id with filter on create_at;
> //"using" "compaction" "policy" CompactionPolicy ( Configuration )? )?
> create index text_idx if not exists on ds_tweet("text") type keyword;
> create index location_idx if not exists on ds_tweet(coordinate) type rtree;
> create index time_idx if not exists on ds_tweet(create_at) type btree;
> create index state_idx if not exists on ds_tweet(geo_tag.stateID) type btree;
> create index county_idx if not exists on ds_tweet(geo_tag.countyID) type btree;
> create index city_idx if not exists on ds_tweet(geo_tag.cityID) type btree;
> create feed MessageFeed using localfs(
> ("path"="128.195.52.77:///home/jianfeng/data/head20m.adm"),
> ("format"="adm"),
> ("type-name"="typeTweet"));
> set wait-for-completion-feed "true";
> connect feed MessageFeed to dataset ds_tweet;
> {code}
> The exception seems related to the Merging phase 
> {code}
> java.lang.IllegalStateException
>     at org.apache.hyracks.storage.am.lsm.common.impls.PrefixMergePolicy.isMergeLagging(PrefixMergePolicy.java:151)
>     at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.exitComponents(LSMHarness.java:211)
>     at org.apache.hyracks.storage.am.lsm.common.impls.LSMHarness.flush(LSMHarness.java:437)
>     at org.apache.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor.flush(LSMTreeIndexAccessor.java:105)
>     at org.apache.hyracks.storage.am.lsm.rtree.impls.LSMRTreeFlushOperation.call(LSMRTreeFlushOperation.java:74)
>     at org.apache.hyracks.storage.am.lsm.rtree.impls.LSMRTreeFlushOperation.call(LSMRTreeFlushOperation.java:34)
>     at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>     at java.lang.Thread.run(Thread.java:744)
> {code}
> I uploaded small sample data [here|https://drive.google.com/open?id=0B423M7wGZj9ddlN2Zk1SZmFEOGs]
> After that, the system hung there, and the read won't proceed.
> If I remove the "with filter xx" statement, then everything works fine.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)