You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by ja...@apache.org on 2018/03/01 18:44:31 UTC

[1/3] phoenix git commit: PHOENIX-4333 Incorrect estimate when stats are updated on a tenant specific view

Repository: phoenix
Updated Branches:
  refs/heads/4.x-cdh5.11.2 00143c428 -> 1da8e8608


PHOENIX-4333 Incorrect estimate when stats are updated on a tenant specific view


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/4b3eb2b9
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/4b3eb2b9
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/4b3eb2b9

Branch: refs/heads/4.x-cdh5.11.2
Commit: 4b3eb2b9d9af6a5b084cad1e60bc720ae79b1fb5
Parents: 00143c4
Author: James Taylor <jt...@salesforce.com>
Authored: Thu Feb 22 10:26:33 2018 -0800
Committer: James Taylor <jt...@salesforce.com>
Committed: Thu Mar 1 10:42:09 2018 -0800

----------------------------------------------------------------------
 .../end2end/ExplainPlanWithStatsEnabledIT.java  | 302 +++++++++++--------
 .../phoenix/iterate/BaseResultIterators.java    | 167 +++++++---
 2 files changed, 305 insertions(+), 164 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/4b3eb2b9/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
index f369be9..2099f4c 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
@@ -21,6 +21,7 @@ import static org.apache.phoenix.query.QueryServicesOptions.DEFAULT_USE_STATS_FO
 import static org.apache.phoenix.util.PhoenixRuntime.TENANT_ID_ATTRIB;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.sql.Connection;
@@ -36,10 +37,11 @@ import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.phoenix.jdbc.PhoenixConnection;
 import org.apache.phoenix.jdbc.PhoenixResultSet;
 import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
-import org.apache.phoenix.query.BaseTest;
 import org.apache.phoenix.schema.PTable;
 import org.apache.phoenix.schema.PTableKey;
 import org.apache.phoenix.schema.TableNotFoundException;
+import org.apache.phoenix.schema.types.PInteger;
+import org.apache.phoenix.util.ByteUtil;
 import org.apache.phoenix.util.EnvironmentEdge;
 import org.apache.phoenix.util.EnvironmentEdgeManager;
 import org.apache.phoenix.util.PhoenixRuntime;
@@ -58,14 +60,14 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
     private static String tableB;
     private static String tableWithLargeGPWidth;
     private static String indexOnA;
-    private static final long largeGpWidth = 2 * 1000 * 1000l;
+    private static final long largeGpWidth = 2 * 1000 * 1000L;
 
     @BeforeClass
     public static void createTables() throws Exception {
         tableA = generateUniqueName();
-        initDataAndStats(tableA, 20l);
+        initDataAndStats(tableA, 20L);
         tableB = generateUniqueName();
-        initDataAndStats(tableB, 20l);
+        initDataAndStats(tableB, 20L);
         tableWithLargeGPWidth = generateUniqueName();
         initDataAndStats(tableWithLargeGPWidth, largeGpWidth);
         indexOnA = generateUniqueName();
@@ -114,8 +116,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         binds.add(200);
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 0l, info.estimatedBytes);
-            assertEquals((Long) 0l, info.estimatedRows);
+            assertEquals((Long) 0L, info.estimatedBytes);
+            assertEquals((Long) 0L, info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -127,8 +129,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         binds.add(99);
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 634l, info.estimatedBytes);
-            assertEquals((Long) 10l, info.estimatedRows);
+            assertEquals((Long) 634L, info.estimatedBytes);
+            assertEquals((Long) 10L, info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -140,8 +142,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         binds.add(0);
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 691l, info.estimatedBytes);
-            assertEquals((Long) 10l, info.estimatedRows);
+            assertEquals((Long) 691L, info.estimatedBytes);
+            assertEquals((Long) 10L, info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -152,8 +154,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
                 "SELECT /*+ NO_INDEX */ * FROM " + tableA + " UNION ALL SELECT * FROM " + tableB;
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             Estimate info = getByteRowEstimates(conn, sql, Lists.newArrayList());
-            assertEquals((Long) (2 * 634l), info.estimatedBytes);
-            assertEquals((Long) (2 * 10l), info.estimatedRows);
+            assertEquals((Long) (2 * 634L), info.estimatedBytes);
+            assertEquals((Long) (2 * 10L), info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -179,8 +181,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
                         + " tb ON ta.k = tb.k";
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             Estimate info = getByteRowEstimates(conn, sql, Lists.newArrayList());
-            assertEquals((Long) (634l), info.estimatedBytes);
-            assertEquals((Long) (10l), info.estimatedRows);
+            assertEquals((Long) (634L), info.estimatedBytes);
+            assertEquals((Long) (10L), info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -192,8 +194,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
                         + " ta JOIN " + tableB + " tb ON ta.k = tb.k";
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             Estimate info = getByteRowEstimates(conn, sql, Lists.newArrayList());
-            assertEquals((Long) (2 * 634l), info.estimatedBytes);
-            assertEquals((Long) (2 * 10l), info.estimatedRows);
+            assertEquals((Long) (2 * 634L), info.estimatedBytes);
+            assertEquals((Long) (2 * 10L), info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -205,8 +207,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         binds.add(99);
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 634l, info.estimatedBytes);
-            assertEquals((Long) 10l, info.estimatedRows);
+            assertEquals((Long) 634L, info.estimatedBytes);
+            assertEquals((Long) 10L, info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -218,8 +220,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             conn.setAutoCommit(true);
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 634l, info.estimatedBytes);
-            assertEquals((Long) 10l, info.estimatedRows);
+            assertEquals((Long) 634L, info.estimatedBytes);
+            assertEquals((Long) 10L, info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -231,8 +233,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             conn.setAutoCommit(false);
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 634l, info.estimatedBytes);
-            assertEquals((Long) 10l, info.estimatedRows);
+            assertEquals((Long) 634L, info.estimatedBytes);
+            assertEquals((Long) 10L, info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -246,9 +248,9 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         binds.add(99);
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 0l, info.estimatedBytes);
-            assertEquals((Long) 0l, info.estimatedRows);
-            assertEquals((Long) 0l, info.estimateInfoTs);
+            assertEquals((Long) 0L, info.estimatedBytes);
+            assertEquals((Long) 0L, info.estimatedRows);
+            assertEquals((Long) 0L, info.estimateInfoTs);
         }
     }
 
@@ -260,8 +262,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             conn.setAutoCommit(true);
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 634l, info.estimatedBytes);
-            assertEquals((Long) 10l, info.estimatedRows);
+            assertEquals((Long) 634L, info.estimatedBytes);
+            assertEquals((Long) 10L, info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -274,8 +276,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             conn.setAutoCommit(false);
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 200l, info.estimatedBytes);
-            assertEquals((Long) 2l, info.estimatedRows);
+            assertEquals((Long) 200L, info.estimatedBytes);
+            assertEquals((Long) 2L, info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -288,9 +290,9 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             conn.setAutoCommit(false);
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 0l, info.estimatedBytes);
-            assertEquals((Long) 0l, info.estimatedRows);
-            assertEquals((Long) 0l, info.estimateInfoTs);
+            assertEquals((Long) 0L, info.estimatedBytes);
+            assertEquals((Long) 0L, info.estimatedRows);
+            assertEquals((Long) 0L, info.estimateInfoTs);
         }
     }
 
@@ -301,8 +303,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             conn.setAutoCommit(false);
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 176l, info.estimatedBytes);
-            assertEquals((Long) 2l, info.estimatedRows);
+            assertEquals((Long) 176L, info.estimatedBytes);
+            assertEquals((Long) 2L, info.estimatedRows);
             assertTrue(info.estimateInfoTs > 0);
         }
     }
@@ -410,13 +412,15 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         String tenant1View = generateUniqueName();
         String tenant2View = generateUniqueName();
         String tenant3View = generateUniqueName();
+        String tenant4View = generateUniqueName();
         String multiTenantBaseTable = generateUniqueName();
         String tenant1 = "tenant1";
         String tenant2 = "tenant2";
         String tenant3 = "tenant3";
+        String tenant4 = "tenant4";
         MyClock clock = new MyClock(1000);
-        createMultitenantTableAndViews(tenant1View, tenant2View, tenant3View, tenant1, tenant2,
-            tenant3, multiTenantBaseTable, clock);
+        createMultitenantTableAndViews(tenant1View, tenant2View, tenant3View, tenant4View, tenant1, tenant2,
+            tenant3, tenant4, multiTenantBaseTable, clock);
 
         // query the entire multitenant table
         String sql = "SELECT * FROM " + multiTenantBaseTable + " WHERE ORGID >= ?";
@@ -424,33 +428,34 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         binds.add("tenant0");
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 817l, info.estimatedBytes);
-            assertEquals((Long) 10l, info.estimatedRows);
-            assertEquals((Long) clock.currentTime(), info.estimateInfoTs);
+            assertEquals((Long) 681L, info.estimatedBytes);
+            assertEquals((Long) 10L, info.estimatedRows);
+            assertNull(info.estimateInfoTs); // unknown/null because region (* - tenant1) has no guideposts
         }
         binds.clear();
+        long prevTenantBytes;
         // query tenant1 view
         try (Connection conn = getTenantConnection(tenant1)) {
             sql = "SELECT * FROM " + tenant1View;
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 143l, info.estimatedBytes);
-            assertEquals((Long) 2l, info.estimatedRows);
-            assertEquals((Long) clock.currentTime(), info.estimateInfoTs);
+            assertEquals((Long) 119L, info.estimatedBytes);
+            assertEquals((Long) 2L, info.estimatedRows);
+            assertNull(info.estimateInfoTs); // unknown/null because scan occurs in first region because of start key versus slightly larger region boundary
         }
         // query tenant2 view
         try (Connection conn = getTenantConnection(tenant2)) {
             sql = "SELECT * FROM " + tenant2View;
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 143l, info.estimatedBytes);
-            assertEquals((Long) 2l, info.estimatedRows);
+            assertEquals((Long) (prevTenantBytes=119L), info.estimatedBytes);
+            assertEquals((Long) 2L, info.estimatedRows);
             assertEquals((Long) clock.currentTime(), info.estimateInfoTs);
         }
         // query tenant3 view
         try (Connection conn = getTenantConnection(tenant3)) {
             sql = "SELECT * FROM " + tenant3View;
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 531l, info.estimatedBytes);
-            assertEquals((Long) 6l, info.estimatedRows);
+            assertEquals((Long) 443L, info.estimatedBytes);
+            assertEquals((Long) 6L, info.estimatedRows);
             assertEquals((Long) clock.currentTime(), info.estimateInfoTs);
         }
         /*
@@ -458,52 +463,51 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
          * advancing our clock by 1000 seconds. This way we can check that only the region for
          * tenant1 will have updated guidepost with the new timestamp.
          */
-        long prevTenant1Bytes = 143l;
         long prevGuidePostTimestamp = clock.currentTime();
         clock.advanceTime(1000);
         try {
             EnvironmentEdgeManager.injectEdge(clock);
             // Update tenant1 view
-            try (Connection conn = getTenantConnection(tenant1)) {
+            try (Connection conn = getTenantConnection(tenant2)) {
                 // upsert a few rows for tenantView
                 conn.createStatement()
-                        .executeUpdate("UPSERT INTO " + tenant1View + " VALUES (11, 11, 11)");
+                        .executeUpdate("UPSERT INTO " + tenant2View + " VALUES (11, 11, 11)");
                 conn.createStatement()
-                        .executeUpdate("UPSERT INTO " + tenant1View + " VALUES (12, 12, 12)");
+                        .executeUpdate("UPSERT INTO " + tenant2View + " VALUES (12, 12, 12)");
                 conn.createStatement()
-                        .executeUpdate("UPSERT INTO " + tenant1View + " VALUES (13, 13, 13)");
+                        .executeUpdate("UPSERT INTO " + tenant2View + " VALUES (13, 13, 13)");
                 conn.createStatement()
-                        .executeUpdate("UPSERT INTO " + tenant1View + " VALUES (14, 14, 14)");
+                        .executeUpdate("UPSERT INTO " + tenant2View + " VALUES (14, 14, 14)");
                 conn.createStatement()
-                        .executeUpdate("UPSERT INTO " + tenant1View + " VALUES (15, 15, 15)");
+                        .executeUpdate("UPSERT INTO " + tenant2View + " VALUES (15, 15, 15)");
                 conn.createStatement()
-                        .executeUpdate("UPSERT INTO " + tenant1View + " VALUES (16, 16, 16)");
+                        .executeUpdate("UPSERT INTO " + tenant2View + " VALUES (16, 16, 16)");
                 conn.commit();
                 // run update stats on the tenantView
-                conn.createStatement().executeUpdate("UPDATE STATISTICS " + tenant1View);
+                conn.createStatement().executeUpdate("UPDATE STATISTICS " + tenant2View);
                 // get estimates now and check if they were updated as expected
-                sql = "SELECT * FROM " + tenant1View;
+                sql = "SELECT * FROM " + tenant2View;
                 Estimate info = getByteRowEstimates(conn, sql, Collections.emptyList());
-                assertTrue(info.estimatedBytes > prevTenant1Bytes);
-                assertEquals((Long) 8l, info.estimatedRows);
+                assertTrue(info.estimatedBytes > prevTenantBytes);
+                assertEquals((Long) 8L, info.estimatedRows);
                 assertEquals((Long) clock.currentTime(), info.estimateInfoTs);
             }
         } finally {
             EnvironmentEdgeManager.reset();
         }
-        // Now check estimates again for tenantView2 and tenantView3. They should stay the same.
-        try (Connection conn = getTenantConnection(tenant2)) {
-            sql = "SELECT * FROM " + tenant2View;
+        // Now check estimates again for tenantView1 and tenantView3. They should stay the same.
+        try (Connection conn = getTenantConnection(tenant1)) {
+            sql = "SELECT * FROM " + tenant1View;
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 143l, info.estimatedBytes);
-            assertEquals((Long) 2l, info.estimatedRows);
-            assertEquals((Long) prevGuidePostTimestamp, info.estimateInfoTs);
+            assertEquals((Long) 119L, info.estimatedBytes);
+            assertEquals((Long) 2L, info.estimatedRows);
+            assertNull(info.estimateInfoTs);
         }
         try (Connection conn = getTenantConnection(tenant3)) {
             sql = "SELECT * FROM " + tenant3View;
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 531l, info.estimatedBytes);
-            assertEquals((Long) 6l, info.estimatedRows);
+            assertEquals((Long) 443L, info.estimatedBytes);
+            assertEquals((Long) 6L, info.estimatedRows);
             assertEquals((Long) prevGuidePostTimestamp, info.estimateInfoTs);
         }
         /*
@@ -516,9 +520,47 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             sql = "SELECT * FROM " + multiTenantBaseTable + " WHERE ORGID >= ?";
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 1399l, info.estimatedBytes);
-            assertEquals((Long) 16l, info.estimatedRows);
-            assertEquals((Long) prevGuidePostTimestamp, info.estimateInfoTs);
+            assertEquals((Long) 1167L, info.estimatedBytes);
+            assertEquals((Long) 16L, info.estimatedRows);
+            assertNull(info.estimateInfoTs);
+        }
+        // query tenant4 view
+        binds.clear();
+        try (Connection conn = getTenantConnection(tenant4)) {
+            sql = "SELECT * FROM " + tenant4View;
+            Estimate info = getByteRowEstimates(conn, sql, binds);
+            assertEquals((Long) (prevTenantBytes=0L), info.estimatedBytes);
+            assertEquals((Long) 0L, info.estimatedRows);
+            assertNull(info.estimateInfoTs); // Unknown b/c second region of tenant4 has no gps
+        }
+        clock.advanceTime(1000);
+        try {
+            EnvironmentEdgeManager.injectEdge(clock);
+            // Update tenant4 view
+            try (Connection conn = getTenantConnection(tenant4)) {
+                // upsert a few rows for tenantView
+                conn.createStatement()
+                    .executeUpdate("UPSERT INTO " + tenant4View + " VALUES (6, 17,17)");
+                conn.createStatement()
+                    .executeUpdate("UPSERT INTO " + tenant4View + " VALUES (7, 17,17)");
+                conn.commit();
+                // run update stats on the tenantView
+                conn.createStatement().executeUpdate("UPDATE STATISTICS " + tenant4View);
+                // get estimates now and check if they were updated as expected
+                sql = "SELECT * FROM " + tenant4View;
+                Estimate info = getByteRowEstimates(conn, sql, Collections.emptyList());
+                assertTrue(info.estimatedBytes > prevTenantBytes);
+                assertEquals((Long) 119L, info.estimatedBytes);
+                assertEquals((Long) 2L, info.estimatedRows);
+                assertEquals((Long) clock.currentTime(), info.estimateInfoTs);
+                sql = "SELECT * FROM " + tenant4View + " WHERE pk2 >= 6";
+                info = getByteRowEstimates(conn, sql, Collections.emptyList());
+                assertEquals((Long) 119L, info.estimatedBytes);
+                assertEquals((Long) 2L, info.estimatedRows);
+                assertEquals((Long) clock.currentTime(), info.estimateInfoTs);
+            }
+        } finally {
+            EnvironmentEdgeManager.reset();
         }
     }
 
@@ -529,10 +571,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             int guidePostWidth = 20;
             String ddl =
                     "CREATE TABLE " + tableName + " (k INTEGER PRIMARY KEY, a bigint, b bigint)"
-                            + " GUIDE_POSTS_WIDTH=" + guidePostWidth;
-            byte[][] splits =
-                    new byte[][] { Bytes.toBytes(102), Bytes.toBytes(105), Bytes.toBytes(108) };
-            BaseTest.createTestTable(getUrl(), ddl, splits, null);
+                            + " GUIDE_POSTS_WIDTH=" + guidePostWidth + " SPLIT ON (102, 105, 108)";
+            conn.createStatement().execute(ddl);
             conn.createStatement().execute("upsert into " + tableName + " values (100,1,3)");
             conn.createStatement().execute("upsert into " + tableName + " values (101,2,4)");
             conn.createStatement().execute("upsert into " + tableName + " values (102,2,4)");
@@ -554,12 +594,12 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             // value set in config which is true.
             ResultSet rs = conn.createStatement().executeQuery(sql);
             // stats are being used for parallelization. So number of scans is higher.
-            assertEquals(14, rs.unwrap(PhoenixResultSet.class).getStatement().getQueryPlan()
+            assertEquals(11, rs.unwrap(PhoenixResultSet.class).getStatement().getQueryPlan()
                     .getScans().get(0).size());
             assertTrue(rs.next());
             assertEquals(10, rs.getInt(1));
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 10l, info.getEstimatedRows());
+            assertEquals((Long) 10L, info.getEstimatedRows());
             assertTrue(info.getEstimateInfoTs() > 0);
 
             // Now, let's disable USE_STATS_FOR_PARALLELIZATION on the table
@@ -572,7 +612,7 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             assertTrue(rs.next());
             assertEquals(10, rs.getInt(1));
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 10l, info.getEstimatedRows());
+            assertEquals((Long) 10L, info.getEstimatedRows());
             assertTrue(info.getEstimateInfoTs() > 0);
 
             // assert that the aggregate query on view also works correctly
@@ -587,7 +627,7 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             assertTrue(rs.next());
             assertEquals(10, rs.getInt(1));
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 10l, info.getEstimatedRows());
+            assertEquals((Long) 10L, info.getEstimatedRows());
             assertTrue(info.getEstimateInfoTs() > 0);
 
             // Now let's make sure that when using stats for parallelization, our estimates
@@ -598,12 +638,12 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             // query the table
             rs = conn.createStatement().executeQuery(sql);
             // stats are being used for parallelization. So number of scans is higher.
-            assertEquals(14, rs.unwrap(PhoenixResultSet.class).getStatement().getQueryPlan()
+            assertEquals(11, rs.unwrap(PhoenixResultSet.class).getStatement().getQueryPlan()
                     .getScans().get(0).size());
             assertTrue(rs.next());
             assertEquals(10, rs.getInt(1));
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 10l, info.getEstimatedRows());
+            assertEquals((Long) 10L, info.getEstimatedRows());
             assertTrue(info.getEstimateInfoTs() > 0);
 
             conn.createStatement()
@@ -612,12 +652,12 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             // query the view
             rs = conn.createStatement().executeQuery(sql);
             // stats are not being used for parallelization. So number of scans is higher.
-            assertEquals(14, rs.unwrap(PhoenixResultSet.class).getStatement().getQueryPlan()
+            assertEquals(11, rs.unwrap(PhoenixResultSet.class).getStatement().getQueryPlan()
                     .getScans().get(0).size());
             assertTrue(rs.next());
             assertEquals(10, rs.getInt(1));
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 10l, info.getEstimatedRows());
+            assertEquals((Long) 10L, info.getEstimatedRows());
             assertTrue(info.getEstimateInfoTs() > 0);
         }
     }
@@ -639,10 +679,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             String ddl =
                     "CREATE TABLE " + tableName + " (k INTEGER PRIMARY KEY, a bigint, b bigint)"
                             + " GUIDE_POSTS_WIDTH=" + guidePostWidth
-                            + ", USE_STATS_FOR_PARALLELIZATION=" + useStatsForParallelization;
-            byte[][] splits =
-                    new byte[][] { Bytes.toBytes(102), Bytes.toBytes(105), Bytes.toBytes(108) };
-            BaseTest.createTestTable(getUrl(), ddl, splits, null);
+                            + ", USE_STATS_FOR_PARALLELIZATION=" + useStatsForParallelization + " SPLIT ON (102, 105, 108)";
+            conn.createStatement().execute(ddl);
             conn.createStatement().execute("upsert into " + tableName + " values (100,100,3)");
             conn.createStatement().execute("upsert into " + tableName + " values (101,101,4)");
             conn.createStatement().execute("upsert into " + tableName + " values (102,102,4)");
@@ -669,8 +707,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             }
             assertEquals(numRows, i);
             Estimate info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 10l, info.getEstimatedRows());
-            assertEquals((Long) 930l, info.getEstimatedBytes());
+            assertEquals((Long) 10L, info.getEstimatedRows());
+            assertEquals((Long) 720L, info.getEstimatedBytes());
             assertTrue(info.getEstimateInfoTs() > 0);
 
             // query whose start key is after any data
@@ -678,8 +716,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             rs = conn.createStatement().executeQuery(sql);
             assertFalse(rs.next());
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 0l, info.getEstimatedRows());
-            assertEquals((Long) 0l, info.getEstimatedBytes());
+            assertEquals((Long) 0L, info.getEstimatedRows());
+            assertEquals((Long) 0L, info.getEstimatedBytes());
             assertTrue(info.getEstimateInfoTs() > 0);
 
             // Query whose end key is before any data
@@ -687,8 +725,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             rs = conn.createStatement().executeQuery(sql);
             assertFalse(rs.next());
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 0l, info.getEstimatedRows());
-            assertEquals((Long) 0l, info.getEstimatedBytes());
+            assertEquals((Long) 0L, info.getEstimatedRows());
+            assertEquals((Long) 0L, info.getEstimatedBytes());
             assertTrue(info.getEstimateInfoTs() > 0);
 
             // Query whose end key is after any data. In this case, we return the estimate as
@@ -703,19 +741,18 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             }
             assertEquals(numRows, i);
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 10l, info.getEstimatedRows());
-            assertEquals((Long) 930l, info.getEstimatedBytes());
+            assertEquals((Long) 10L, info.getEstimatedRows());
+            assertEquals((Long) 720L, info.getEstimatedBytes());
             assertTrue(info.getEstimateInfoTs() > 0);
 
-            // Query whose start key and end key is before any data. In this case, we return the
-            // estimate as
-            // scanning the first guide post
+            // Query whose start key and end key is before any data. In this case,
+            // we return the estimate as scanning the first guide post
             sql = "SELECT a FROM " + tableName + " WHERE K <= 90 AND K >= 80";
             rs = conn.createStatement().executeQuery(sql);
             assertFalse(rs.next());
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 0l, info.getEstimatedRows());
-            assertEquals((Long) 0l, info.getEstimatedBytes());
+            assertEquals((Long) 0L, info.getEstimatedRows());
+            assertEquals((Long) 0L, info.getEstimatedBytes());
             assertTrue(info.getEstimateInfoTs() > 0);
 
             // Query whose start key and end key is after any data. In this case, we return the
@@ -725,13 +762,12 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             rs = conn.createStatement().executeQuery(sql);
             assertFalse(rs.next());
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 0l, info.getEstimatedRows());
-            assertEquals((Long) 0l, info.getEstimatedBytes());
+            assertEquals((Long) 0L, info.getEstimatedRows());
+            assertEquals((Long) 0L, info.getEstimatedBytes());
             assertTrue(info.getEstimateInfoTs() > 0);
 
             // Query whose start key is before and end key is between data. In this case, we return
-            // the estimate as
-            // scanning no guide post
+            // the estimate as scanning no guide post
             sql = "SELECT a FROM " + tableName + " WHERE K <= 102 AND K >= 90";
             rs = conn.createStatement().executeQuery(sql);
             i = 0;
@@ -743,8 +779,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             info = getByteRowEstimates(conn, sql, binds);
             // Depending on the guidepost boundary, this estimate
             // can be slightly off. It's called estimate for a reason.
-            assertEquals((Long) 4l, info.getEstimatedRows());
-            assertEquals((Long) 330l, info.getEstimatedBytes());
+            assertEquals((Long) 3L, info.getEstimatedRows());
+            assertEquals((Long) 160L, info.getEstimatedBytes());
             assertTrue(info.getEstimateInfoTs() > 0);
             // Query whose start key is between and end key is after data.
             sql = "SELECT a FROM " + tableName + " WHERE K <= 120 AND K >= 100";
@@ -758,8 +794,8 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             info = getByteRowEstimates(conn, sql, binds);
             // Depending on the guidepost boundary, this estimate
             // can be slightly off. It's called estimate for a reason.
-            assertEquals((Long) 9l, info.getEstimatedRows());
-            assertEquals((Long) 900l, info.getEstimatedBytes());
+            assertEquals((Long) 10L, info.getEstimatedRows());
+            assertEquals((Long) 720L, info.getEstimatedBytes());
             assertTrue(info.getEstimateInfoTs() > 0);
             // Query whose start key and end key are both between data.
             sql = "SELECT a FROM " + tableName + " WHERE K <= 109 AND K >= 100";
@@ -773,28 +809,36 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
             info = getByteRowEstimates(conn, sql, binds);
             // Depending on the guidepost boundary, this estimate
             // can be slightly off. It's called estimate for a reason.
-            assertEquals((Long) 9l, info.getEstimatedRows());
-            assertEquals((Long) 900l, info.getEstimatedBytes());
+            assertEquals((Long) 10L, info.getEstimatedRows());
+            assertEquals((Long) 720L, info.getEstimatedBytes());
             assertTrue(info.getEstimateInfoTs() > 0);
         }
     }
 
     private static void createMultitenantTableAndViews(String tenant1View, String tenant2View,
-            String tenant3View, String tenant1, String tenant2, String tenant3,
-            String multiTenantTable, MyClock clock) throws SQLException {
+            String tenant3View, String tenant4View, String tenant1, String tenant2, String tenant3, String tenant4,
+            String multiTenantTable, MyClock clock) throws Exception {
         byte[][] splits =
-                new byte[][] { Bytes.toBytes(tenant1), Bytes.toBytes(tenant2),
-                        Bytes.toBytes(tenant3) };
+                new byte[][] {
+                    ByteUtil.concat(Bytes.toBytes(tenant1),PInteger.INSTANCE.toBytes(1)),
+                    ByteUtil.concat(Bytes.toBytes(tenant2),PInteger.INSTANCE.toBytes(1)),
+                    ByteUtil.concat(Bytes.toBytes(tenant3),PInteger.INSTANCE.toBytes(1)),
+                    ByteUtil.concat(Bytes.toBytes(tenant4),PInteger.INSTANCE.toBytes(6)),
+                    };
         String ddl =
                 "CREATE TABLE " + multiTenantTable
-                        + " (orgId CHAR(15) NOT NULL, pk2 integer NOT NULL, c1.a bigint, c2.b bigint CONSTRAINT PK PRIMARY KEY "
+                        + " (orgId CHAR(7) NOT NULL, pk2 integer NOT NULL, c1.a bigint, c2.b bigint CONSTRAINT PK PRIMARY KEY "
                         + "(ORGID, PK2)) MULTI_TENANT=true, GUIDE_POSTS_WIDTH=2";
         // Use our own clock to get rows created with our controlled timestamp
         try {
             EnvironmentEdgeManager.injectEdge(clock);
-            createTestTable(getUrl(), ddl, splits, null);
-            clock.advanceTime(1000);
             try (Connection conn = DriverManager.getConnection(getUrl())) {
+                PreparedStatement stmt = conn.prepareStatement(ddl + " SPLIT ON (?,?,?,?)");
+                for (int i = 0; i < splits.length; i++) {
+                    stmt.setBytes(i+1, splits[i]);
+                }
+                stmt.executeUpdate();
+                clock.advanceTime(1000);
                 /**
                  * Insert 2 rows each for tenant1 and tenant2 and 6 rows for tenant3
                  */
@@ -803,35 +847,41 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
                 conn.createStatement().execute(
                     "upsert into " + multiTenantTable + " values ('" + tenant1 + "',2,2,2)");
                 conn.createStatement().execute(
-                    "upsert into " + multiTenantTable + " values ('" + tenant2 + "',3,3,3)");
+                    "upsert into " + multiTenantTable + " values ('" + tenant2 + "',1,3,3)");
                 conn.createStatement().execute(
-                    "upsert into " + multiTenantTable + " values ('" + tenant2 + "',4,4,4)");
+                    "upsert into " + multiTenantTable + " values ('" + tenant2 + "',2,4,4)");
                 conn.createStatement().execute(
-                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',5,5,5)");
+                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',1,5,5)");
                 conn.createStatement().execute(
-                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',6,6,6)");
+                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',2,6,6)");
                 conn.createStatement().execute(
-                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',7,7,7)");
+                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',3,7,7)");
                 conn.createStatement().execute(
-                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',8,8,8)");
+                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',4,8,8)");
                 conn.createStatement().execute(
-                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',9,9,9)");
+                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',5,9,9)");
                 conn.createStatement().execute(
-                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',10,10,10)");
+                    "upsert into " + multiTenantTable + " values ('" + tenant3 + "',6,10,10)");
                 conn.commit();
-                conn.createStatement().execute("UPDATE STATISTICS " + multiTenantTable);
             }
             try (Connection conn = getTenantConnection(tenant1)) {
                 conn.createStatement().execute(
                     "CREATE VIEW " + tenant1View + " AS SELECT * FROM " + multiTenantTable);
+                conn.createStatement().execute("UPDATE STATISTICS " + tenant1View);
             }
             try (Connection conn = getTenantConnection(tenant2)) {
                 conn.createStatement().execute(
                     "CREATE VIEW " + tenant2View + " AS SELECT * FROM " + multiTenantTable);
+                conn.createStatement().execute("UPDATE STATISTICS " + tenant2View);
             }
             try (Connection conn = getTenantConnection(tenant3)) {
                 conn.createStatement().execute(
                     "CREATE VIEW " + tenant3View + " AS SELECT * FROM " + multiTenantTable);
+                conn.createStatement().execute("UPDATE STATISTICS " + tenant3View);
+            }
+            try (Connection conn = getTenantConnection(tenant4)) {
+                conn.createStatement().execute(
+                    "CREATE VIEW " + tenant4View + " AS SELECT * FROM " + multiTenantTable);
             }
         } finally {
             EnvironmentEdgeManager.reset();
@@ -937,7 +987,7 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
              * Because we ran update stats only for tenant1View, there is only partial guidepost
              * info available for tenant2View.
              */
-            assertEquals((Long) 1l, info.estimatedRows);
+            assertEquals((Long) 1L, info.estimatedRows);
             // ok now run update stats for tenant2 view
             conn.createStatement().execute("UPDATE STATISTICS " + tenant2View);
             /*
@@ -945,7 +995,7 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT {
              * available now.
              */
             info = getByteRowEstimates(conn, sql, binds);
-            assertEquals((Long) 6l, info.estimatedRows);
+            assertEquals((Long) 6L, info.estimatedRows);
         }
     }
 

http://git-wip-us.apache.org/repos/asf/phoenix/blob/4b3eb2b9/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java b/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java
index 0484748..682d1ed 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java
@@ -591,9 +591,21 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
         return context.getConnection().getQueryServices().getTableStats(key);
     }
 
+    private static void updateEstimates(GuidePostsInfo gps, int guideIndex, GuidePostEstimate estimate) {
+        estimate.rowsEstimate += gps.getRowCounts()[guideIndex];
+        estimate.bytesEstimate += gps.getByteCounts()[guideIndex];
+        /*
+         * It is possible that the timestamp of guideposts could be different.
+         * So we report the time at which stats information was collected as the
+         * minimum of timestamp of the guideposts that we will be going over.
+         */
+        estimate.lastUpdated =
+                Math.min(estimate.lastUpdated,
+                    gps.getGuidePostTimestamps()[guideIndex]);
+    }
+    
     private List<Scan> addNewScan(List<List<Scan>> parallelScans, List<Scan> scans, Scan scan,
-            byte[] startKey, boolean crossedRegionBoundary, HRegionLocation regionLocation,
-            GuidePostEstimate estimate, Long gpsRows, Long gpsBytes) {
+            byte[] startKey, boolean crossedRegionBoundary, HRegionLocation regionLocation) {
         boolean startNewScan = scanGrouper.shouldStartNewScan(plan, scans, startKey, crossedRegionBoundary);
         if (scan != null) {
             if (regionLocation.getServerName() != null) {
@@ -602,12 +614,6 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
             if (useStatsForParallelization || crossedRegionBoundary) {
                 scans.add(scan);
             }
-            if (estimate != null && gpsRows != null) {
-                estimate.rowsEstimate += gpsRows;
-            }
-            if (estimate != null && gpsBytes != null) {
-                estimate.bytesEstimate += gpsBytes;
-            }
         }
         if (startNewScan && !scans.isEmpty()) {
             parallelScans.add(scans);
@@ -669,7 +675,7 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
                     newScan.setStopRow(regionInfo.getEndKey());
                 }
             }
-            scans = addNewScan(parallelScans, scans, newScan, endKey, true, regionLocation, null, null, null);
+            scans = addNewScan(parallelScans, scans, newScan, endKey, true, regionLocation);
             regionIndex++;
         }
         if (!scans.isEmpty()) { // Add any remaining scans
@@ -681,6 +687,7 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
     private static class GuidePostEstimate {
         private long bytesEstimate;
         private long rowsEstimate;
+        private long lastUpdated = Long.MAX_VALUE;
     }
 
     private int computeColumnsInCommon() {
@@ -854,6 +861,21 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
      * Compute the list of parallel scans to run for a given query. The inner scans
      * may be concatenated together directly, while the other ones may need to be
      * merge sorted, depending on the query.
+     * Also computes an estimated bytes scanned, rows scanned, and last update time
+     * of statistics. To compute correctly, we need to handle a couple of edge cases:
+     * 1) if a guidepost is equal to the start key of the scan.
+     * 2) If a guidepost is equal to the end region key.
+     * In both cases, we set a flag (delayAddingEst) which indicates that the previous
+     * gp should be use in our stats calculation. The normal case is that a gp is
+     * encountered which is in the scan range in which case it is simply added to
+     * our calculation.
+     * For the last update time, we use the min timestamp of the gp that are in
+     * range of the scans that will be issued. If we find no gp in the range, we use
+     * the gp in the first or last region of the scan. If we encounter a region with
+     * no gp, then we return a null value as an indication that we don't know with
+     * certainty when the stats were updated last. This handles the case of a split
+     * occurring for a large ingest with stats never having been calculated for the
+     * new region.
      * @return list of parallel scans to run for a given query.
      * @throws SQLException
      */
@@ -902,9 +924,10 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
         }
         
         int regionIndex = 0;
+        int startRegionIndex = 0;
         int stopIndex = regionBoundaries.size();
         if (startRegionBoundaryKey.length > 0) {
-            regionIndex = getIndexContainingInclusive(regionBoundaries, startRegionBoundaryKey);
+            startRegionIndex = regionIndex = getIndexContainingInclusive(regionBoundaries, startRegionBoundaryKey);
         }
         if (stopRegionBoundaryKey.length > 0) {
             stopIndex = Math.min(stopIndex, regionIndex + getIndexContainingExclusive(regionBoundaries.subList(regionIndex, stopIndex), stopRegionBoundaryKey));
@@ -927,31 +950,55 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
         PrefixByteDecoder decoder = null;
         int guideIndex = 0;
         GuidePostEstimate estimates = new GuidePostEstimate();
-        long estimateTs = Long.MAX_VALUE;
-        long minGuidePostTimestamp = Long.MAX_VALUE;
+        boolean gpsForFirstRegion = false;
+        boolean intersectWithGuidePosts = true;
+        // Maintain min ts for gps in first or last region outside of
+        // gps that are in the scan range. We'll use this if we find
+        // no gps in range.
+        long fallbackTs = Long.MAX_VALUE;
+        // Determination of whether of not we found a guidepost in
+        // every region between the start and stop key. If not, then
+        // we cannot definitively say at what time the guideposts
+        // were collected.
+        boolean gpsAvailableForAllRegions = true;
         try {
+            boolean delayAddingEst = false;
+            ImmutableBytesWritable firstRegionStartKey = null;
             if (gpsSize > 0) {
                 stream = new ByteArrayInputStream(guidePosts.get(), guidePosts.getOffset(), guidePosts.getLength());
                 input = new DataInputStream(stream);
                 decoder = new PrefixByteDecoder(gps.getMaxLength());
+                firstRegionStartKey = new ImmutableBytesWritable(regionLocations.get(regionIndex).getRegionInfo().getStartKey());
                 try {
-                    while (currentKey.compareTo(currentGuidePost = PrefixByteCodec.decode(decoder, input)) >= 0
-                            && currentKey.getLength() != 0) {
-                        minGuidePostTimestamp = Math.min(estimateTs,
-                            gps.getGuidePostTimestamps()[guideIndex]);
+                    int c;
+                    // Continue walking guideposts until we get past the currentKey
+                    while ((c=currentKey.compareTo(currentGuidePost = PrefixByteCodec.decode(decoder, input))) >= 0) {
+                        // Detect if we found a guidepost that might be in the first region. This
+                        // is for the case where the start key may be past the only guidepost in
+                        // the first region.
+                        if (!gpsForFirstRegion && firstRegionStartKey.compareTo(currentGuidePost) <= 0) {
+                            gpsForFirstRegion = true;
+                        }
+                        // While we have gps in the region (but outside of start/stop key), track
+                        // the min ts as a fallback for the time at which stas were calculated.
+                        if (gpsForFirstRegion) {
+                            fallbackTs =
+                                    Math.min(fallbackTs,
+                                        gps.getGuidePostTimestamps()[guideIndex]);
+                        }
+                        // Special case for gp == startKey in which case we want to
+                        // count this gp (if it's in range) though we go past it.
+                        delayAddingEst = (c == 0);
                         guideIndex++;
                     }
                 } catch (EOFException e) {
                     // expected. Thrown when we have decoded all guide posts.
+                    intersectWithGuidePosts = false;
                 }
             }
+            byte[] endRegionKey = regionLocations.get(stopIndex).getRegionInfo().getEndKey();
             byte[] currentKeyBytes = currentKey.copyBytes();
-            boolean intersectWithGuidePosts = guideIndex < gpsSize;
-            if (!intersectWithGuidePosts) {
-                // If there are no guide posts within the query range, we use the estimateInfoTimestamp
-                // as the minimum time across all guideposts
-                estimateTs = minGuidePostTimestamp;
-            }
+            intersectWithGuidePosts &= guideIndex < gpsSize;
             // Merge bisect with guideposts for all but the last region
             while (regionIndex <= stopIndex) {
                 HRegionLocation regionLocation = regionLocations.get(regionIndex);
@@ -982,36 +1029,41 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
                     keyOffset = ScanUtil.getRowKeyOffset(regionInfo.getStartKey(), regionInfo.getEndKey());
                 }
                 byte[] initialKeyBytes = currentKeyBytes;
-                while (intersectWithGuidePosts && (endKey.length == 0 || currentGuidePost.compareTo(endKey) <= 0)) {
+                int gpsComparedToEndKey = -1;
+                boolean everNotDelayed = false;
+                while (intersectWithGuidePosts && (endKey.length == 0 || (gpsComparedToEndKey=currentGuidePost.compareTo(endKey)) <= 0)) {
                     Scan newScan = scanRanges.intersectScan(scan, currentKeyBytes, currentGuidePostBytes, keyOffset,
                         false);
                     if (newScan != null) {
                         ScanUtil.setLocalIndexAttributes(newScan, keyOffset,
                             regionInfo.getStartKey(), regionInfo.getEndKey(),
                             newScan.getStartRow(), newScan.getStopRow());
+                        // If we've delaying adding estimates, add the previous
+                        // gp estimates now that we know they are in range.
+                        if (delayAddingEst) {
+                            updateEstimates(gps, guideIndex-1, estimates);
+                        }
+                        // If we're not delaying adding estimates, add the
+                        // current gp estimates.
+                        if (! (delayAddingEst = gpsComparedToEndKey == 0) ) {
+                            updateEstimates(gps, guideIndex, estimates);
+                        }
+                    } else {
+                        delayAddingEst = false;
                     }
-                    scans =
-                            addNewScan(parallelScans, scans, newScan, currentGuidePostBytes, false,
-                                regionLocation, estimates, gps.getRowCounts()[guideIndex],
-                                gps.getByteCounts()[guideIndex]);
+                    everNotDelayed |= !delayAddingEst;
+                    scans = addNewScan(parallelScans, scans, newScan, currentGuidePostBytes, false, regionLocation);
                     currentKeyBytes = currentGuidePostBytes;
                     try {
                         currentGuidePost = PrefixByteCodec.decode(decoder, input);
                         currentGuidePostBytes = currentGuidePost.copyBytes();
-                        /*
-                         * It is possible that the timestamp of guideposts could be different.
-                         * So we report the time at which stats information was collected as the
-                         * minimum of timestamp of the guideposts that we will be going over.
-                         */
-                        estimateTs =
-                                Math.min(estimateTs,
-                                    gps.getGuidePostTimestamps()[guideIndex]);
                         guideIndex++;
                     } catch (EOFException e) {
                         // We have read all guide posts
                         intersectWithGuidePosts = false;
                     }
                 }
+                boolean gpsInThisRegion = initialKeyBytes != currentKeyBytes;
                 if (!useStatsForParallelization) {
                     /*
                      * If we are not using stats for generating parallel scans, we need to reset the
@@ -1023,15 +1075,40 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
                 if(newScan != null) {
                     ScanUtil.setLocalIndexAttributes(newScan, keyOffset, regionInfo.getStartKey(),
                         regionInfo.getEndKey(), newScan.getStartRow(), newScan.getStopRow());
+                    // Boundary case of no GP in region after delaying adding of estimates
+                    if (!gpsInThisRegion && delayAddingEst) {
+                        updateEstimates(gps, guideIndex-1, estimates);
+                        gpsInThisRegion = true;
+                        delayAddingEst = false;
+                    }
+                } else if (!gpsInThisRegion) {
+                    delayAddingEst = false;
                 }
-                scans = addNewScan(parallelScans, scans, newScan, endKey, true, regionLocation, null, null, null);
+                scans = addNewScan(parallelScans, scans, newScan, endKey, true, regionLocation);
                 currentKeyBytes = endKey;
+                // We have a guide post in the region if the above loop was entered
+                // or if the current key is less than the region end key (since the loop
+                // may not have been entered if our scan end key is smaller than the
+                // first guide post in that region).
+                boolean gpsAfterStopKey = false;
+                gpsAvailableForAllRegions &= 
+                    ( gpsInThisRegion && everNotDelayed) || // GP in this region
+                    ( regionIndex == startRegionIndex && gpsForFirstRegion ) || // GP in first region (before start key)
+                    ( gpsAfterStopKey = ( regionIndex == stopIndex && intersectWithGuidePosts && // GP in last region (after stop key)
+                            ( endRegionKey.length == 0 || // then check if gp is in the region
+                            currentGuidePost.compareTo(endRegionKey) < 0)  ) );            
+                if (gpsAfterStopKey) {
+                    // If gp after stop key, but still in last region, track min ts as fallback 
+                    fallbackTs =
+                            Math.min(fallbackTs,
+                                gps.getGuidePostTimestamps()[guideIndex]);
+                }
                 regionIndex++;
             }
             if (scanRanges.isPointLookup()) {
                 this.estimatedRows = Long.valueOf(scanRanges.getPointLookupCount());
                 this.estimatedSize = this.estimatedRows * SchemaUtil.estimateRowSize(table);
-                this.estimateInfoTimestamp = EnvironmentEdgeManager.currentTimeMillis();
+                this.estimateInfoTimestamp = computeMinTimestamp(gpsAvailableForAllRegions, estimates, fallbackTs);
             } else if (emptyGuidePost) {
                 // In case of an empty guide post, we estimate the number of rows scanned by
                 // using the estimated row size
@@ -1041,7 +1118,7 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
             } else if (hasGuidePosts) {
                 this.estimatedRows = estimates.rowsEstimate;
                 this.estimatedSize = estimates.bytesEstimate;
-                this.estimateInfoTimestamp = estimateTs;
+                this.estimateInfoTimestamp = computeMinTimestamp(gpsAvailableForAllRegions, estimates, fallbackTs);
             } else {
                 this.estimatedRows = null;
                 this.estimatedSize = null;
@@ -1057,6 +1134,20 @@ public abstract class BaseResultIterators extends ExplainTable implements Result
         return parallelScans;
     }
 
+    private static Long computeMinTimestamp(boolean gpsAvailableForAllRegions, 
+            GuidePostEstimate estimates,
+            long fallbackTs) {
+        if (gpsAvailableForAllRegions) {
+            if (estimates.lastUpdated < Long.MAX_VALUE) {
+                return estimates.lastUpdated;
+            }
+            if (fallbackTs < Long.MAX_VALUE) {
+                return fallbackTs;
+            }
+        }
+        return null;
+    }
+
     /**
      * Loop through List<List<Scan>> parallelScans object, 
      * rolling dice on each scan based on startRowKey.


[3/3] phoenix git commit: PHOENIX-4628 Allow min time between update stats to be configurable separately from stats cache TTL

Posted by ja...@apache.org.
PHOENIX-4628 Allow min time between update stats to be configurable separately from stats cache TTL


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/1da8e860
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/1da8e860
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/1da8e860

Branch: refs/heads/4.x-cdh5.11.2
Commit: 1da8e8608df50a631405de2dfd8e99788f4361a7
Parents: a2a56ad
Author: James Taylor <jt...@salesforce.com>
Authored: Tue Feb 27 12:24:06 2018 -0800
Committer: James Taylor <jt...@salesforce.com>
Committed: Thu Mar 1 10:42:41 2018 -0800

----------------------------------------------------------------------
 .../main/java/org/apache/phoenix/query/QueryServicesOptions.java | 3 +++
 .../src/main/java/org/apache/phoenix/schema/MetaDataClient.java  | 4 +---
 2 files changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/1da8e860/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
index 6905fc3..8dadbb0 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
@@ -235,6 +235,8 @@ public class QueryServicesOptions {
     public static final int DEFAULT_STATS_POOL_SIZE = 4;
     // Maximum size (in bytes) that cached table stats should take upm
     public static final long DEFAULT_STATS_MAX_CACHE_SIZE = 256 * 1024 * 1024;
+    // Allow stats collection to be initiated by client multiple times immediately
+    public static final int DEFAULT_MIN_STATS_UPDATE_FREQ_MS = 0;
 
     public static final boolean DEFAULT_USE_REVERSE_SCAN = true;
 
@@ -381,6 +383,7 @@ public class QueryServicesOptions {
             .setIfUnset(DATE_FORMAT_ATTRIB, DEFAULT_DATE_FORMAT)
             .setIfUnset(DATE_FORMAT_TIMEZONE_ATTRIB, DEFAULT_DATE_FORMAT_TIMEZONE)
             .setIfUnset(STATS_UPDATE_FREQ_MS_ATTRIB, DEFAULT_STATS_UPDATE_FREQ_MS)
+            .setIfUnset(MIN_STATS_UPDATE_FREQ_MS_ATTRIB, DEFAULT_MIN_STATS_UPDATE_FREQ_MS)
             .setIfUnset(CALL_QUEUE_ROUND_ROBIN_ATTRIB, DEFAULT_CALL_QUEUE_ROUND_ROBIN)
             .setIfUnset(MAX_MUTATION_SIZE_ATTRIB, DEFAULT_MAX_MUTATION_SIZE)
             .setIfUnset(ROW_KEY_ORDER_SALTED_TABLE_ATTRIB, DEFAULT_FORCE_ROW_KEY_ORDER)

http://git-wip-us.apache.org/repos/asf/phoenix/blob/1da8e860/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
index 8be3dcf..e7f7795 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
@@ -1165,9 +1165,7 @@ public class MetaDataClient {
     private long updateStatisticsInternal(PName physicalName, PTable logicalTable, Map<String, Object> statsProps, List<byte[]> cfs, boolean checkLastStatsUpdateTime) throws SQLException {
         ReadOnlyProps props = connection.getQueryServices().getProps();
         final long msMinBetweenUpdates = props
-                .getLong(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB,
-                        props.getLong(QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB,
-                                QueryServicesOptions.DEFAULT_STATS_UPDATE_FREQ_MS) / 2);
+                .getLong(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB, QueryServicesOptions.DEFAULT_MIN_STATS_UPDATE_FREQ_MS);
         Long scn = connection.getSCN();
         // Always invalidate the cache
         long clientTimeStamp = connection.getSCN() == null ? HConstants.LATEST_TIMESTAMP : scn;


[2/3] phoenix git commit: PHOENIX-4626 Increase time allowed for partial index rebuild to complete

Posted by ja...@apache.org.
PHOENIX-4626 Increase time allowed for partial index rebuild to complete


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/a2a56ad8
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/a2a56ad8
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/a2a56ad8

Branch: refs/heads/4.x-cdh5.11.2
Commit: a2a56ad8021091ba9903fc683671d02f58a0a1fa
Parents: 4b3eb2b
Author: James Taylor <jt...@salesforce.com>
Authored: Mon Feb 26 15:34:26 2018 -0800
Committer: James Taylor <jt...@salesforce.com>
Committed: Thu Mar 1 10:42:24 2018 -0800

----------------------------------------------------------------------
 .../main/java/org/apache/phoenix/query/QueryServicesOptions.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/a2a56ad8/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
index d749433..6905fc3 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
@@ -197,7 +197,7 @@ public class QueryServicesOptions {
     public static final long DEFAULT_INDEX_REBUILD_RPC_TIMEOUT = 30000 * 60; // 30 mins
     public static final long DEFAULT_INDEX_REBUILD_CLIENT_SCANNER_TIMEOUT = 30000 * 60; // 30 mins
     public static final int DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER = 5; // 5 total tries at rpc level
-    public static final int DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD = 30000 * 60; // 30 mins
+    public static final int DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD = 60000 * 60 * 24; // 24 hrs
     public static final long DEFAULT_INDEX_PENDING_DISABLE_THRESHOLD = 30000; // 30 secs
 
     /**