You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2023/01/30 03:26:58 UTC

[asterixdb] 09/30: [NO ISSUE][COMP] Add sample-seed parameter to ANALYZE DATASET

This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit fba0449ab02c7df056e00efb1bfdf04bf319ef76
Author: Dmitry Lychagin <dm...@couchbase.com>
AuthorDate: Fri Jun 17 18:18:52 2022 -0700

    [NO ISSUE][COMP] Add sample-seed parameter to ANALYZE DATASET
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - Add sample-seed parameter to ANALYZE DATASET statement
    - Update testcases
    
    Change-Id: I78429541bf7d720cc73dc674dd532f7a1f066a24
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16584
    Contrib: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Ali Alsuliman <al...@gmail.com>
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17327
    Reviewed-by: Michael Blow <mb...@apache.org>
    Tested-by: Michael Blow <mb...@apache.org>
---
 .../asterix/app/translator/QueryTranslator.java    |  8 ++--
 .../analyze-dataset-1.1.ddl.sqlpp                  |  5 ++-
 .../analyze-dataset-1.10.ddl.sqlpp                 |  2 +-
 .../analyze-dataset-1.11.query.sqlpp               |  2 +-
 .../analyze-dataset-1.14.query.sqlpp               |  2 +-
 .../analyze-dataset-1.15.ddl.sqlpp                 |  2 +-
 .../analyze-dataset-1.16.query.sqlpp               |  2 +-
 .../analyze-dataset-1.19.query.sqlpp               |  2 +-
 .../analyze-dataset-1.2.query.sqlpp                |  2 +-
 .../analyze-dataset-1.21.query.sqlpp               |  2 +-
 .../analyze-dataset-1.4.ddl.sqlpp                  |  2 +-
 .../analyze-dataset-1.5.query.sqlpp                |  2 +-
 .../analyze-dataset-1.7.query.sqlpp                |  2 +-
 .../analyze-dataset-1.9.query.sqlpp                |  2 +-
 .../ddl/analyze-dataset-1/analyze-dataset-1.11.adm |  2 +-
 .../ddl/analyze-dataset-1/analyze-dataset-1.14.adm |  2 +-
 .../ddl/analyze-dataset-1/analyze-dataset-1.16.adm |  2 +-
 .../ddl/analyze-dataset-1/analyze-dataset-1.19.adm |  2 +-
 .../ddl/analyze-dataset-1/analyze-dataset-1.2.adm  |  2 +-
 .../ddl/analyze-dataset-1/analyze-dataset-1.5.adm  |  2 +-
 .../ddl/analyze-dataset-1/analyze-dataset-1.7.adm  |  2 +-
 .../ddl/analyze-dataset-1/analyze-dataset-1.9.adm  |  2 +-
 .../lang/common/statement/AnalyzeStatement.java    | 46 +++++++++++++++++++++-
 .../apache/asterix/metadata/entities/Index.java    | 11 +++++-
 .../IndexTupleTranslator.java                      | 16 +++++++-
 .../metadata/utils/SampleOperationsHelper.java     |  3 +-
 .../SampleSlotRunningAggregateFunctionFactory.java |  9 +++--
 27 files changed, 105 insertions(+), 33 deletions(-)

diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 71f59cf753..5b60cc050c 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -4271,9 +4271,11 @@ public class QueryTranslator extends AbstractLangTranslator implements IStatemen
 
             InternalDatasetDetails dsDetails = (InternalDatasetDetails) ds.getDatasetDetails();
             int sampleCardinalityTarget = stmtAnalyze.getSampleSize();
+            long sampleSeed = stmtAnalyze.getOrCreateSampleSeed();
 
-            Index.SampleIndexDetails newIndexDetailsPendingAdd = new Index.SampleIndexDetails(dsDetails.getPrimaryKey(),
-                    dsDetails.getKeySourceIndicator(), dsDetails.getPrimaryKeyType(), sampleCardinalityTarget, 0, 0);
+            Index.SampleIndexDetails newIndexDetailsPendingAdd =
+                    new Index.SampleIndexDetails(dsDetails.getPrimaryKey(), dsDetails.getKeySourceIndicator(),
+                            dsDetails.getPrimaryKeyType(), sampleCardinalityTarget, 0, 0, sampleSeed);
             newIndexPendingAdd = new Index(dataverseName, datasetName, newIndexName, sampleIndexType,
                     newIndexDetailsPendingAdd, false, false, MetadataUtil.PENDING_ADD_OP);
 
@@ -4315,7 +4317,7 @@ public class QueryTranslator extends AbstractLangTranslator implements IStatemen
 
             Index.SampleIndexDetails newIndexDetailsFinal = new Index.SampleIndexDetails(dsDetails.getPrimaryKey(),
                     dsDetails.getKeySourceIndicator(), dsDetails.getPrimaryKeyType(), sampleCardinalityTarget,
-                    stats.getCardinality(), stats.getAvgTupleSize());
+                    stats.getCardinality(), stats.getAvgTupleSize(), sampleSeed);
             Index newIndexFinal = new Index(dataverseName, datasetName, newIndexName, sampleIndexType,
                     newIndexDetailsFinal, false, false, MetadataUtil.PENDING_NO_OP);
 
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp
index 50daffd58e..e1d6b1020e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp
@@ -27,9 +27,10 @@ drop dataverse test if exists;
 create dataverse test;
 use test;
 
-create function listMetadata(showSourceAvgItemSize) {
+create function listMetadata(showSourceAvgItemSize, showSeed) {
   select i.DatasetName, i.IndexName, i.SampleCardinalityTarget, i.SourceCardinality,
-    case when showSourceAvgItemSize then i.SourceAvgItemSize else i.SourceAvgItemSize > 0 end as SourceAvgItemSize
+    case when showSourceAvgItemSize then i.SourceAvgItemSize else i.SourceAvgItemSize > 0 end as SourceAvgItemSize,
+    case when showSeed then i.SampleSeed else i.SampleSeed is known end as SampleSeed
   from Metadata.`Index` i
   where i.DataverseName = "test" and i.IndexName like "sample_idx%"
   order by i.IndexName
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp
index 1de0947dfa..da5fe13a8d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp
@@ -24,4 +24,4 @@
 
 use test;
 
-analyze dataset test.ds1 with { "sample": "medium" };
+analyze dataset test.ds1 with { "sample": "medium", "sample-seed": 234.0 };
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp
index 549d273331..38ded0a330 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
 use test;
 
 select * from
-  listMetadata(false) metadata,
+  listMetadata(false, true) metadata,
   showSampleStats("ds1", "sample_idx_2_ds1", true) stats;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp
index b206f5920b..4cae2027a4 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
 use test;
 
 select * from
-  listMetadata(false) metadata,
+  listMetadata(false, false) metadata,
   showSampleStats("ds1", "sample_idx_1_ds1", false) stats;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp
index 0cdaf19fcc..6ceb81416d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp
@@ -24,4 +24,4 @@
 
 use test;
 
-analyze dataset ds1 with { "sample": "high" };
+analyze dataset ds1 with { "sample": "high", "sample-seed": "345" };
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp
index 549d273331..38ded0a330 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
 use test;
 
 select * from
-  listMetadata(false) metadata,
+  listMetadata(false, true) metadata,
   showSampleStats("ds1", "sample_idx_2_ds1", true) stats;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp
index b206f5920b..4cae2027a4 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
 use test;
 
 select * from
-  listMetadata(false) metadata,
+  listMetadata(false, false) metadata,
   showSampleStats("ds1", "sample_idx_1_ds1", false) stats;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp
index 0f1edbd43e..e786e0ef0a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp
@@ -24,4 +24,4 @@
 
 use test;
 
-listMetadata(true);
+listMetadata(true, false);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
index 587629e858..759fc3f417 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
@@ -24,4 +24,4 @@
 use test;
 
 select count(*) cnt
-from listMetadata(true) v;
+from listMetadata(true, false) v;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp
index 3993a1c565..ed97897046 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp
@@ -21,4 +21,4 @@
  * Description: Test sample size parameter
  */
 
-analyze dataset test.ds1 with { "sample": "low" };
+analyze dataset test.ds1 with { "sample": "low", "sample-seed": 123 };
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp
index 243dab84ab..e0cd6cc7c5 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp
@@ -27,5 +27,5 @@ set `import-private-functions` `true`;
 use test;
 
 select * from
-  listMetadata(false) metadata,
+  listMetadata(false, true) metadata,
   showSampleStats("ds1", "sample_idx_2_ds1", true) stats
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp
index d984ef5f6e..c4930b0920 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp
@@ -27,5 +27,5 @@ set `import-private-functions` `true`;
 use test;
 
 select * from
-  listMetadata(false) metadata,
+  listMetadata(false, true) metadata,
   showSampleStats("ds1", "sample_idx_2_ds1", true) stats;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp
index b206f5920b..4cae2027a4 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
 use test;
 
 select * from
-  listMetadata(false) metadata,
+  listMetadata(false, false) metadata,
   showSampleStats("ds1", "sample_idx_1_ds1", false) stats;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm
index 534cc7a9c0..58f454b3ed 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 4252, "SourceCardinality": 1100, "SourceAvgItemSize": true }, "stats": { "cnt": 1100, "min_pk": 1, "max_pk": 1100, "min_x": -1100, "max_x": -1 } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 4252, "SourceCardinality": 1100, "SourceAvgItemSize": true, "SampleSeed": 234 }, "stats": { "cnt": 1100, "min_pk": 1, "max_pk": 1100, "min_x": -1100, "max_x": -1 } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm
index ee57f4cae6..6ef756af2f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 4252, "SourceCardinality": 4400, "SourceAvgItemSize": true }, "stats": { "cnt": 4246, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 4252, "SourceCardinality": 4400, "SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 4246, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm
index b46ed0b4ce..01eb5b74a8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 4400, "SourceAvgItemSize": true }, "stats": { "cnt": 4400, "min_pk": 1, "max_pk": 4400, "min_x": -4400, "max_x": -1 } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 4400, "SourceAvgItemSize": true, "SampleSeed": 345 }, "stats": { "cnt": 4400, "min_pk": 1, "max_pk": 4400, "min_x": -4400, "max_x": -1 } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm
index 74d092707f..60b969f490 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 17100, "SourceAvgItemSize": true }, "stats": { "cnt": 16972, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 17100, "SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 16972, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm
index ab853ec2d0..e3cefeeb10 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm
@@ -1 +1 @@
-{ "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 0, "SourceAvgItemSize": 0 }
\ No newline at end of file
+{ "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 0, "SourceAvgItemSize": 0, "SampleSeed": true }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm
index a8a77bd0fc..605bb1270e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize": true }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1 } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize": true, "SampleSeed": 123 }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1 } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm
index a8a77bd0fc..605bb1270e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize": true }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1 } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize": true, "SampleSeed": 123 }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1 } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm
index ee7f2c013f..0084d2bc86 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 1100, "SourceAvgItemSize": true }, "stats": { "cnt": 1033, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 1100, "SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 1033, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
index 7e6e99dbcc..cbf2c071fe 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
@@ -34,6 +34,7 @@ import org.apache.asterix.object.base.AdmObjectNode;
 import org.apache.asterix.object.base.AdmStringNode;
 import org.apache.asterix.object.base.IAdmNode;
 import org.apache.asterix.om.types.BuiltinType;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 
 public class AnalyzeStatement extends AbstractStatement {
 
@@ -46,6 +47,8 @@ public class AnalyzeStatement extends AbstractStatement {
     private static final int SAMPLE_HIGH_SIZE = SAMPLE_MEDIUM_SIZE * 4;
     private static final int SAMPLE_DEFAULT_SIZE = SAMPLE_LOW_SIZE;
 
+    private static final String SAMPLE_SEED_FIELD_NAME = "sample-seed";
+
     private final DataverseName dataverseName;
     private final String datasetName;
     private final AdmObjectNode options;
@@ -54,7 +57,20 @@ public class AnalyzeStatement extends AbstractStatement {
             throws CompilationException {
         this.dataverseName = dataverseName;
         this.datasetName = datasetName;
-        this.options = options == null ? null : ExpressionUtils.toNode(options);
+        this.options = options == null ? null : validateOptions(ExpressionUtils.toNode(options));
+    }
+
+    private static AdmObjectNode validateOptions(AdmObjectNode options) throws CompilationException {
+        for (String fieldName : options.getFieldNames()) {
+            switch (fieldName) {
+                case SAMPLE_FIELD_NAME:
+                case SAMPLE_SEED_FIELD_NAME:
+                    break;
+                default:
+                    throw new CompilationException(ErrorCode.INVALID_PARAM, fieldName);
+            }
+        }
+        return options;
     }
 
     @Override
@@ -106,6 +122,34 @@ public class AnalyzeStatement extends AbstractStatement {
         }
     }
 
+    public long getOrCreateSampleSeed() throws AlgebricksException {
+        IAdmNode n = getOption(SAMPLE_SEED_FIELD_NAME);
+        return n != null ? getSampleSeed(n) : createSampleSeed();
+    }
+
+    private long getSampleSeed(IAdmNode n) throws CompilationException {
+        switch (n.getType()) {
+            case BIGINT:
+                return ((AdmBigIntNode) n).get();
+            case DOUBLE:
+                return (long) ((AdmDoubleNode) n).get();
+            case STRING:
+                String s = ((AdmStringNode) n).get();
+                try {
+                    return Long.parseLong(s);
+                } catch (NumberFormatException e) {
+                    throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_SEED_FIELD_NAME);
+                }
+            default:
+                throw new CompilationException(ErrorCode.WITH_FIELD_MUST_BE_OF_TYPE, SAMPLE_SEED_FIELD_NAME,
+                        BuiltinType.AINT64.getTypeName(), n.getType().toString());
+        }
+    }
+
+    private long createSampleSeed() {
+        return System.nanoTime() + System.identityHashCode(this);
+    }
+
     private boolean isValidSampleSize(int v) {
         return v >= SAMPLE_LOW_SIZE && v <= SAMPLE_HIGH_SIZE;
     }
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java
index eae81d5b04..21d2aaac6b 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java
@@ -553,15 +553,18 @@ public class Index implements IMetadataEntity<Index>, Comparable<Index> {
 
         private final int sourceAvgItemSize;
 
+        private final long sampleSeed;
+
         public SampleIndexDetails(List<List<String>> keyFieldNames, List<Integer> keyFieldSourceIndicators,
-                List<IAType> keyFieldTypes, int sampleCardinalityTarget, long sourceCardinality,
-                int sourceAvgItemSize) {
+                List<IAType> keyFieldTypes, int sampleCardinalityTarget, long sourceCardinality, int sourceAvgItemSize,
+                long sampleSeed) {
             this.keyFieldNames = keyFieldNames;
             this.keyFieldSourceIndicators = keyFieldSourceIndicators;
             this.keyFieldTypes = keyFieldTypes;
             this.sampleCardinalityTarget = sampleCardinalityTarget;
             this.sourceCardinality = sourceCardinality;
             this.sourceAvgItemSize = sourceAvgItemSize;
+            this.sampleSeed = sampleSeed;
         }
 
         @Override
@@ -597,6 +600,10 @@ public class Index implements IMetadataEntity<Index>, Comparable<Index> {
         public int getSourceAvgItemSize() {
             return sourceAvgItemSize;
         }
+
+        public long getSampleSeed() {
+            return sampleSeed;
+        }
     }
 
     @Deprecated
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java
index 967c2ba128..9c742ed9c2 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java
@@ -91,6 +91,7 @@ public class IndexTupleTranslator extends AbstractTupleTranslator<Index> {
     public static final String INDEX_SEARCHKEY_ELEMENTS_FIELD_NAME = "SearchKeyElements";
     public static final String COMPLEXSEARCHKEY_UNNEST_FIELD_NAME = "UnnestList";
     public static final String COMPLEXSEARCHKEY_PROJECT_FIELD_NAME = "ProjectList";
+    public static final String SAMPLE_SEED = "SampleSeed";
     public static final String SAMPLE_CARDINALITY_TARGET = "SampleCardinalityTarget";
     public static final String SOURCE_CARDINALITY = "SourceCardinality";
     public static final String SOURCE_AVG_ITEM_SIZE = "SourceAvgItemSize";
@@ -464,6 +465,12 @@ public class IndexTupleTranslator extends AbstractTupleTranslator<Index> {
                         searchElements.stream().map(Pair::getSecond).map(l -> l.get(0)).collect(Collectors.toList());
                 keyFieldTypes = searchKeyType.stream().map(l -> l.get(0)).collect(Collectors.toList());
 
+                int sampleSeedPos = indexRecord.getType().getFieldIndex(SAMPLE_SEED);
+                if (sampleSeedPos < 0) {
+                    throw new AsterixException(ErrorCode.METADATA_ERROR, SAMPLE_SEED);
+                }
+                long sampleSeed = ((AInt64) indexRecord.getValueByPos(sampleSeedPos)).getLongValue();
+
                 int sampleCardinalityTargetPos = indexRecord.getType().getFieldIndex(SAMPLE_CARDINALITY_TARGET);
                 if (sampleCardinalityTargetPos < 0) {
                     throw new AsterixException(ErrorCode.METADATA_ERROR, SAMPLE_CARDINALITY_TARGET);
@@ -484,7 +491,7 @@ public class IndexTupleTranslator extends AbstractTupleTranslator<Index> {
                 int sourceAvgItemSize = ((AInt32) indexRecord.getValueByPos(sourceAvgItemSizePos)).getIntegerValue();
 
                 indexDetails = new Index.SampleIndexDetails(keyFieldNames, keyFieldSourceIndicator, keyFieldTypes,
-                        sampleCardinalityTarget, sourceCardinality, sourceAvgItemSize);
+                        sampleCardinalityTarget, sourceCardinality, sourceAvgItemSize, sampleSeed);
                 break;
             default:
                 throw new AsterixException(ErrorCode.METADATA_ERROR, indexType.toString());
@@ -901,6 +908,13 @@ public class IndexTupleTranslator extends AbstractTupleTranslator<Index> {
         if (index.getIndexType() == IndexType.SAMPLE) {
             Index.SampleIndexDetails indexDetails = (Index.SampleIndexDetails) index.getIndexDetails();
 
+            nameValue.reset();
+            fieldValue.reset();
+            aString.setValue(SAMPLE_SEED);
+            stringSerde.serialize(aString, nameValue.getDataOutput());
+            int64Serde.serialize(new AInt64(indexDetails.getSampleSeed()), fieldValue.getDataOutput());
+            recordBuilder.addField(nameValue, fieldValue);
+
             nameValue.reset();
             fieldValue.reset();
             aString.setValue(SAMPLE_CARDINALITY_TARGET);
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java
index 28e1ac2e1e..0d3e015c0f 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java
@@ -157,6 +157,7 @@ public class SampleOperationsHelper implements ISecondaryIndexOperationsHelper {
     public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
         Index.SampleIndexDetails indexDetails = (Index.SampleIndexDetails) index.getIndexDetails();
         int sampleCardinalityTarget = indexDetails.getSampleCardinalityTarget();
+        long sampleSeed = indexDetails.getSampleSeed();
         IDataFormat format = metadataProvider.getDataFormat();
         int nFields = recordDesc.getFieldCount();
         int[] columns = new int[nFields];
@@ -211,7 +212,7 @@ public class SampleOperationsHelper implements ISecondaryIndexOperationsHelper {
         RecordDescriptor raggRecordDesc = new RecordDescriptor(raggSerdes, raggTraits);
 
         IRunningAggregateEvaluatorFactory raggSlotEvalFactory =
-                new SampleSlotRunningAggregateFunctionFactory(sampleCardinalityTarget);
+                new SampleSlotRunningAggregateFunctionFactory(sampleCardinalityTarget, sampleSeed);
         IRunningAggregateEvaluatorFactory raggCounterEvalFactory = TidRunningAggregateDescriptor.FACTORY
                 .createFunctionDescriptor().createRunningAggregateEvaluatorFactory(new IScalarEvaluatorFactory[0]);
         RunningAggregateRuntimeFactory raggRuntimeFactory =
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java
index c53da46c90..a4bda44085 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java
@@ -41,12 +41,15 @@ import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
  */
 public class SampleSlotRunningAggregateFunctionFactory implements IRunningAggregateEvaluatorFactory {
 
-    private static final long serialVersionUID = 1L;
+    private static final long serialVersionUID = 2L;
 
     private final int sampleCardinalityTarget;
 
-    public SampleSlotRunningAggregateFunctionFactory(int sampleCardinalityTarget) {
+    private final long sampleSeed;
+
+    public SampleSlotRunningAggregateFunctionFactory(int sampleCardinalityTarget, long sampleSeed) {
         this.sampleCardinalityTarget = sampleCardinalityTarget;
+        this.sampleSeed = sampleSeed;
     }
 
     @Override
@@ -65,7 +68,7 @@ public class SampleSlotRunningAggregateFunctionFactory implements IRunningAggreg
                     SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT32);
             private final AMutableInt32 aInt32 = new AMutableInt32(0);
 
-            private final Random rnd = new Random();
+            private final Random rnd = new Random(sampleSeed);
             private long counter;
 
             @Override