You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wa...@apache.org on 2018/02/19 17:07:54 UTC

[7/7] asterixdb git commit: [ASTERIXDB-2083][COMP][RT][IDX][SITE] Budget-Constrained Inverted index search

[ASTERIXDB-2083][COMP][RT][IDX][SITE] Budget-Constrained Inverted index search

- user-model changes: add text.searchmemory parameter
- storage format changes: no
- interface changes: IInvertedIndexSearcher, IInPlaceInvertedIndex,
                     IInvertedIndexAccessor, IInvertedListCursor
                     IObjectFactory, IPartitionedInvertedIndex,
                     IIndexAccessor

Details:
- Introduce text.searchmemory parameter in the configuration
  to conduct budget-constrained inverted index search to prevent
  a possible OOM exception
- Remove non-standard hyracks task context from the inverted-index-search

Change-Id: Ib2b2ef7c0b8c55ef66a5322be5d97ebbbf287bf5
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2251
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: abdullah alamoudi <ba...@gmail.com>
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/afe0d3d9
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/afe0d3d9
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/afe0d3d9

Branch: refs/heads/master
Commit: afe0d3d99d42260a0a8553495354720ee8d62a84
Parents: c587da1
Author: Taewoo Kim <wa...@yahoo.com>
Authored: Sun Feb 18 19:12:08 2018 -0800
Committer: Taewoo Kim <wa...@gmail.com>
Committed: Mon Feb 19 09:07:23 2018 -0800

----------------------------------------------------------------------
 .../physical/InvertedIndexPOperator.java        |    10 +-
 .../asterix-app/data/csv/fragile_sample.csv     | 20000 +++++++++++++++++
 .../apache/asterix/api/common/APIFramework.java |    18 +-
 .../app/resource/OperatorResourcesComputer.java |    26 +-
 .../org/apache/asterix/utils/ResourceUtils.java |     7 +-
 .../asterix-app/src/main/resources/cc.conf      |     1 +
 .../asterix-app/src/main/resources/cc2.conf     |     1 +
 .../asterix-app/src/main/resources/cc3.conf     |     1 +
 .../app/resource/PlanStagesGeneratorTest.java   |     2 +-
 .../asterix-app/src/test/resources/cc.conf      |     1 +
 .../fulltext-index-03.1.ddl.sqlpp               |    51 +
 .../fulltext-index-03.2.update.sqlpp            |    24 +
 .../fulltext-index-03.3.query.sqlpp             |    25 +
 .../fulltext-index-large-data.1.ddl.sqlpp       |    47 +
 .../fulltext-index-large-data.2.update.sqlpp    |    27 +
 .../fulltext-index-large-data.3.ddl.sqlpp       |    22 +
 .../fulltext-index-large-data.4.query.sqlpp     |    24 +
 .../fulltext-index-large-data.5.query.sqlpp     |    24 +
 ...x-ngram-edit-distance-large-data.1.ddl.sqlpp |    47 +
 ...gram-edit-distance-large-data.2.update.sqlpp |    27 +
 ...x-ngram-edit-distance-large-data.3.ddl.sqlpp |    22 +
 ...ngram-edit-distance-large-data.4.query.sqlpp |    26 +
 ...ngram-edit-distance-large-data.5.query.sqlpp |    26 +
 .../cluster_state_1/cluster_state_1.1.regexadm  |     1 +
 .../cluster_state_1_full.1.regexadm             |     1 +
 .../cluster_state_1_less.1.regexadm             |     1 +
 .../fulltext-index-large-data.4.adm             |     1 +
 .../fulltext-index-large-data.5.adm             |     1 +
 ...d-index-ngram-edit-distance-large-data.4.adm |     1 +
 ...d-index-ngram-edit-distance-large-data.5.adm |     1 +
 .../resources/runtimets/testsuite_sqlpp.xml     |    15 +
 .../common/config/CompilerProperties.java       |    10 +
 .../asterix-doc/src/site/markdown/ncservice.md  |     1 +
 .../base/PhysicalOptimizationConfig.java        |    10 +
 .../hyracks/api/exceptions/ErrorCode.java       |     5 +
 .../hyracks/api/util/HyracksConstants.java      |     4 +
 .../src/main/resources/errormsg/en.properties   |     5 +
 .../BufferManagerBackedVSizeFrame.java          |   108 +
 .../BTreeSearchOperatorNodePushable.java        |     7 +
 .../IndexSearchOperatorNodePushable.java        |     6 +-
 .../common/impls/NoOpIndexAccessParameters.java |     5 +-
 .../am/common/test/IIndexCursorTest.java        |     2 +-
 ...eeDiskComponentScanOperatorNodePushable.java |     6 +
 .../lsm/btree/impls/ExternalBTreeWithBuddy.java |     4 +-
 .../storage/am/lsm/btree/impls/LSMBTree.java    |    11 +-
 .../am/lsm/common/impls/AbstractLSMIndex.java   |    13 +-
 .../api/IInPlaceInvertedIndex.java              |    19 +-
 .../api/IInvertedIndexAccessor.java             |     6 +-
 .../api/IInvertedIndexSearcher.java             |    33 +-
 .../invertedindex/api/IInvertedListCursor.java  |    57 -
 .../lsm/invertedindex/api/IObjectFactory.java   |     4 +-
 .../api/IPartitionedInvertedIndex.java          |     6 +-
 .../invertedindex/api/InvertedListCursor.java   |   103 +
 ...SMInvertedIndexSearchOperatorDescriptor.java |     8 +-
 ...InvertedIndexSearchOperatorNodePushable.java |    24 +-
 .../invertedindex/impls/LSMInvertedIndex.java   |    19 +-
 .../impls/LSMInvertedIndexAccessor.java         |     6 +-
 .../impls/LSMInvertedIndexOpContext.java        |    21 +-
 ...SMInvertedIndexSearchCursorInitialState.java |    47 +-
 .../inmemory/InMemoryInvertedIndex.java         |    27 +-
 .../inmemory/InMemoryInvertedIndexAccessor.java |    46 +-
 .../inmemory/InMemoryInvertedListCursor.java    |    54 +-
 .../PartitionedInMemoryInvertedIndex.java       |    15 +-
 ...artitionedInMemoryInvertedIndexAccessor.java |    16 +-
 .../FixedSizeElementInvertedListCursor.java     |   394 +-
 .../FixedSizeElementInvertedListScanCursor.java |   191 +
 .../ondisk/FixedSizeFrameTupleAccessor.java     |    11 +-
 .../ondisk/FixedSizeFrameTupleAppender.java     |    32 +-
 .../ondisk/OnDiskInvertedIndex.java             |   128 +-
 .../ondisk/OnDiskInvertedIndexOpContext.java    |     2 +-
 .../OnDiskInvertedIndexRangeSearchCursor.java   |    78 +-
 .../ondisk/OnDiskInvertedIndexSearchCursor.java |    73 +-
 .../ondisk/PartitionedOnDiskInvertedIndex.java  |    42 +-
 .../search/AbstractTOccurrenceSearcher.java     |   185 +-
 .../search/InvertedIndexFinalSearchResult.java  |   179 +
 .../search/InvertedIndexSearchResult.java       |   416 +
 .../search/InvertedListCursorFactory.java       |    14 +-
 .../search/InvertedListMerger.java              |   661 +-
 .../search/InvertedListPartitions.java          |    27 +-
 .../search/PartitionedTOccurrenceSearcher.java  |   236 +-
 .../lsm/invertedindex/search/SearchResult.java  |   146 -
 .../search/TOccurrenceSearcher.java             |    72 +-
 .../am/lsm/invertedindex/util/ObjectCache.java  |     7 +-
 .../am/lsm/rtree/impls/AbstractLSMRTree.java    |    10 +-
 .../storage/am/lsm/rtree/impls/LSMRTree.java    |     3 +-
 .../impls/LSMRTreeWithAntiMatterTuples.java     |     2 +-
 .../RTreeSearchOperatorNodePushable.java        |     7 +
 .../hyracks/storage/common/IIndexAccessor.java  |     4 +-
 .../am/common/AbstractIndexTestWorker.java      |     2 +-
 .../storage/am/common/IndexTestContext.java     |     2 +-
 .../am/config/AccessMethodTestsConfig.java      |     3 +
 .../cursor/LSMBTreePointSearchCursorTest.java   |     2 +-
 .../cursor/LSMBTreeRangeSearchCursorTest.java   |     2 +-
 .../pom.xml                                     |     5 +
 .../common/AbstractInvertedIndexDeleteTest.java |     2 +-
 .../common/AbstractInvertedIndexSearchTest.java |     2 +-
 .../common/AbstractInvertedIndexTest.java       |     2 +-
 .../multithread/LSMInvertedIndexTestWorker.java |    24 +
 .../ondisk/FixedSizeFrameTupleTest.java         |    12 +-
 .../util/LSMInvertedIndexTestContext.java       |    24 +
 .../util/LSMInvertedIndexTestUtils.java         |   176 +-
 101 files changed, 23388 insertions(+), 979 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/algebra/operators/physical/InvertedIndexPOperator.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/algebra/operators/physical/InvertedIndexPOperator.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/algebra/operators/physical/InvertedIndexPOperator.java
index c3cc0ae..eeb6688 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/algebra/operators/physical/InvertedIndexPOperator.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/algebra/operators/physical/InvertedIndexPOperator.java
@@ -18,6 +18,7 @@
  */
 package org.apache.asterix.algebra.operators.physical;
 
+import org.apache.asterix.common.config.OptimizationConfUtil;
 import org.apache.asterix.metadata.MetadataManager;
 import org.apache.asterix.metadata.declared.DataSourceId;
 import org.apache.asterix.metadata.declared.MetadataProvider;
@@ -109,13 +110,16 @@ public class InvertedIndexPOperator extends IndexSearchPOperator {
             // tuples.
             retainNull = true;
         }
+        // In-memory budget (frame limit) for inverted-index search operations
+        int frameLimit = OptimizationConfUtil.getPhysicalOptimizationConfig().getMaxFramesForTextSearch();
+
         // Build runtime.
         Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> invIndexSearch =
                 buildInvertedIndexRuntime(metadataProvider, context, builder.getJobSpec(), unnestMapOp, opSchema,
                         jobGenParams.getRetainInput(), retainNull, jobGenParams.getDatasetName(), dataset,
                         jobGenParams.getIndexName(), jobGenParams.getSearchKeyType(), keyIndexes,
                         jobGenParams.getSearchModifierType(), jobGenParams.getSimilarityThreshold(),
-                        minFilterFieldIndexes, maxFilterFieldIndexes, jobGenParams.getIsFullTextSearch());
+                        minFilterFieldIndexes, maxFilterFieldIndexes, jobGenParams.getIsFullTextSearch(), frameLimit);
 
         // Contribute operator in hyracks job.
         builder.contributeHyracksOperator(unnestMapOp, invIndexSearch.first);
@@ -129,7 +133,7 @@ public class InvertedIndexPOperator extends IndexSearchPOperator {
             AbstractUnnestMapOperator unnestMap, IOperatorSchema opSchema, boolean retainInput, boolean retainMissing,
             String datasetName, Dataset dataset, String indexName, ATypeTag searchKeyType, int[] keyFields,
             SearchModifierType searchModifierType, IAlgebricksConstantValue similarityThreshold,
-            int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes, boolean isFullTextSearchQuery)
+            int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes, boolean isFullTextSearchQuery, int frameLimit)
             throws AlgebricksException {
         boolean propagateIndexFilter = unnestMap.propagateIndexFilter();
         IAObject simThresh = ((AsterixConstantValue) similarityThreshold).getObject();
@@ -159,7 +163,7 @@ public class InvertedIndexPOperator extends IndexSearchPOperator {
                 dataset.getSearchCallbackFactory(metadataProvider.getStorageComponentProvider(), secondaryIndex,
                         IndexOperation.SEARCH, null),
                 minFilterFieldIndexes, maxFilterFieldIndexes, isFullTextSearchQuery, numPrimaryKeys,
-                propagateIndexFilter);
+                propagateIndexFilter, frameLimit);
         return new Pair<>(invIndexSearchOp, secondarySplitsAndConstraint.second);
     }
 }