You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@phoenix.apache.org by GitBox <gi...@apache.org> on 2019/12/27 18:36:31 UTC

[GitHub] [phoenix] gokceni opened a new pull request #666: Phoenix 5652

gokceni opened a new pull request #666: Phoenix 5652
URL: https://github.com/apache/phoenix/pull/666
 
 
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365364321
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.INVALID_ROW_COUNT;
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.VALID_ROW_COUNT;
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOGGER = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, ID, ZIP, EMPLOYER, EMPLOY_DATE)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, ZIP, EMPLOY_DATE, EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID, NAME)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (ZIP) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s  ON %s (NAME, ID, ZIP) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before
+    public void setup() throws SQLException {
+        generateUniqueTableNames();
+        createTestTable(getUrl(), String.format(dataTableDdl, dataTableFullName));
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            createTestTable(getUrl(), String.format(viewDdl, viewFullName, dataTableFullName));
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, viewFullName));
+        } else {
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, dataTableFullName));
+        }
+        props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+        conn = DriverManager.getConnection(getUrl(), props);
+        String dataTableUpsert = String.format(UPSERT_SQL, dataTableFullName);
+        dataTableUpsertStmt = conn.prepareStatement(dataTableUpsert);
+        String viewUpsert = String.format(UPSERT_SQL, viewFullName);
+        viewUpsertStmt = conn.prepareStatement(viewUpsert);
+        conn.setAutoCommit(false);
+        testTime = EnvironmentEdgeManager.currentTimeMillis() - 1000;
+
+    }
+
+    @After
+    public void teardown() throws SQLException {
+        if (conn != null) {
+            conn.close();
+        }
+    }
+
+    @Test
+    public void testSkipScan() throws Exception {
+        String baseTableName = dataTableName;
+        PreparedStatement upsertStmt = dataTableUpsertStmt;
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            baseTableName = viewName;
+            upsertStmt = viewUpsertStmt;
+        }
+
+        for (int i=0; i < 2; i++) {
+            if (dataTableDdl.contains("MULTI_TENANT")) {
+                upsertRowMultiTenant(upsertStmt, "val"+i, i,"name-" + i, 19999);
+            } else {
+                upsertRow(upsertStmt, i, "name-" + i, 19999);
+            }
+            conn.commit();
+        }
+
+        List<Job> completedJobs = IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.DATA_TABLE_SOURCE);
+        PreparedStatement targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
 
 Review comment:
   What's the goal of this test? If it's just to validate that we're using skip scan filters when appropriate, is there a point to running the scrutiny? (Or at least, to running it for every test case?) If there is, as I asked above, are there scrutiny test runs elsewhere that are no longer necessary?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361734934
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOG = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+//        list.add(new Object[] {
+//                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+//                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before public void setup() throws SQLException {
 
 Review comment:
   nit: start the access modifiers on the new line

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361732655
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/IndexColumnNames.java
 ##########
 @@ -252,4 +288,61 @@ public String getQualifiedIndexTableName() {
     public List<String> getIndexPkColNames() {
         return indexPkColNames;
     }
+
+    public List<String> getDataColNamesForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return dataColNames;
+        } else {
+            return dataColNamesForSkipScan;
+        }
+ //       return dataColNamesForSkipScan;
+    }
+
+    public List<String> getIndexPkColNamesForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return indexPkColNames;
+        } else {
+            return indexPkColNamesForSkipScan;
+        }
+   //     return indexPkColNamesForSkipScan;
 
 Review comment:
   nit: please remove the commented code.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361739074
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOG = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+//        list.add(new Object[] {
+//                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+//                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before public void setup() throws SQLException {
+        generateUniqueTableNames();
+        createTestTable(getUrl(), String.format(dataTableDdl, dataTableFullName));
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            createTestTable(getUrl(), String.format(viewDdl, viewFullName, dataTableFullName));
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, viewFullName));
+        } else {
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, dataTableFullName));
+        }
+        props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+        conn = DriverManager.getConnection(getUrl(), props);
+        String dataTableUpsert = String.format(UPSERT_SQL, dataTableFullName);
+        dataTableUpsertStmt = conn.prepareStatement(dataTableUpsert);
+        String viewUpsert = String.format(UPSERT_SQL, viewFullName);
+        viewUpsertStmt = conn.prepareStatement(viewUpsert);
+        conn.setAutoCommit(false);
+        testTime = EnvironmentEdgeManager.currentTimeMillis() - 1000;
+
+    }
+
+    @After public void teardown() throws SQLException {
+        if (conn != null) {
+            conn.close();
+        }
+    }
+
+    @Test
+    public void testSkipScan() throws Exception {
+        String baseTableName = dataTableName;
+        PreparedStatement upsertStmt = dataTableUpsertStmt;
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            baseTableName = viewName;
+            upsertStmt = viewUpsertStmt;
+        }
+
+        for (int i=0; i < 2; i++) {
+            if (dataTableDdl.contains("MULTI_TENANT")) {
+                upsertRowMultiTenant(upsertStmt, "val"+i, i,"name-" + i, 19999);
+            } else {
+                upsertRow(upsertStmt, i, "name-" + i, 19999);
+            }
+            conn.commit();
+        }
+
+        IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.INDEX_TABLE_SOURCE);
+        PreparedStatement targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+
+        IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.DATA_TABLE_SOURCE);
+        targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+    }
+
+    private void assertSkipScanFilter(PreparedStatement targetStmt) throws SQLException {
+        QueryPlan plan = targetStmt.unwrap(PhoenixStatement.class).getQueryPlan();
+        Scan scan = plan.getScans().get(0).get(0);
+        Filter filter = scan.getFilter();
+        if (filter instanceof FilterList) {
+            FilterList filterList = (FilterList) scan.getFilter();
+
+            boolean isSkipScanUsed = false;
+            for (Filter f : filterList.getFilters()) {
+                if (f instanceof SkipScanFilter) {
+                    isSkipScanUsed = true;
+                    break;
+                }
+            }
+            assertEquals(true, isSkipScanUsed);
+        } else {
+            assertEquals(SkipScanFilter.class, filter.getClass());
+        }
+    }
+    
+    private void generateUniqueTableNames() {
+        schemaName = "S_" + generateUniqueName();
+        dataTableName = "D_" + generateUniqueName();
+        dataTableFullName = SchemaUtil.getTableName(schemaName, dataTableName);
+        viewName = "V_" + generateUniqueName();
+        viewFullName = SchemaUtil.getTableName(schemaName, viewName);
+        indexTableName = "I_" + generateUniqueName();
+        indexTableFullName = SchemaUtil.getTableName(schemaName, indexTableName);
+    }
+
+    private void upsertRow(PreparedStatement stmt, int id, String name, int zip) throws SQLException {
+        int index = 1;
+        // insert row
+        stmt.setInt(index++, id);
+        stmt.setString(index++, name);
+        stmt.setInt(index++, zip);
+        stmt.setTimestamp(index++, new Timestamp(testTime));
+        stmt.executeUpdate();
+    }
+
+    private void upsertRowMultiTenant(PreparedStatement stmt, String col, int id, String name, int zip) throws SQLException {
 
 Review comment:
   Can this be moved to IndexScrutinyToolBaseIT and reused in both ITs, this and IndexScrutinyToolIT?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365362702
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.INVALID_ROW_COUNT;
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.VALID_ROW_COUNT;
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOGGER = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
 
 Review comment:
   Some comments about what each parameter is testing would be useful, because it's not always immediately obvious. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361741681
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/IndexColumnNames.java
 ##########
 @@ -252,4 +288,61 @@ public String getQualifiedIndexTableName() {
     public List<String> getIndexPkColNames() {
         return indexPkColNames;
     }
+
+    public List<String> getDataColNamesForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return dataColNames;
+        } else {
+            return dataColNamesForSkipScan;
+        }
+ //       return dataColNamesForSkipScan;
+    }
+
+    public List<String> getIndexPkColNamesForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return indexPkColNames;
+        } else {
+            return indexPkColNamesForSkipScan;
+        }
+   //     return indexPkColNamesForSkipScan;
+    }
+
+    public List<String> getIndexColNamesForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return indexColNames;
+        } else {
+            return indexColNamesForSkipScan;
+        }
+ //      return indexColNamesForSkipScan;
+    }
+
+    public List<String> getDynamicIndexColsForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return getDynamicIndexCols();
+        }
+        // don't want the column family for dynamic columns
+        List<String> indexSqlTypes = Lists.newArrayList();
+
+        for (String colName : indexColNamesForSkipScan) {
+            indexSqlTypes.add(indexColSqlTypeNamesForSkipScan.get(colName));
+        }
+
+        return getDynamicCols(getUnqualifiedColNames(indexColNamesForSkipScan), indexSqlTypes);
+    }
+
+
+    public List<String> getDynamicDataColsForSkipScan() {
 
 Review comment:
   good to have a test on dynamic columns as well. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365369370
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/index/IndexScrutinyTool.java
 ##########
 @@ -259,18 +266,20 @@ public Job createSubmittableJob(String schemaName, String indexTable, String dat
 
             // set CURRENT_SCN for our scan so that incoming writes don't throw off scrutiny
             configuration.set(PhoenixConfigurationUtil.CURRENT_SCN_VALUE, Long.toString(ts));
+            String tenantId = configuration.get(PhoenixRuntime.TENANT_ID_ATTRIB);
 
             // set the source table to either data or index table
             SourceTargetColumnNames columnNames =
                     SourceTable.DATA_TABLE_SOURCE.equals(sourceTable)
                             ? new SourceTargetColumnNames.DataSourceColNames(pdataTable,
-                                    pindexTable)
+                                    pindexTable, tenantId)
                             : new SourceTargetColumnNames.IndexSourceColNames(pdataTable,
-                                    pindexTable);
+                                    pindexTable, tenantId);
             String qSourceTable = columnNames.getQualifiedSourceTableName();
-            List<String> sourceColumnNames = columnNames.getSourceColNames();
-            List<String> sourceDynamicCols = columnNames.getSourceDynamicCols();
-            List<String> targetDynamicCols = columnNames.getTargetDynamicCols();
+
+            List<String> sourceColumnNames = columnNames.getSourceColNamesForSkipScan();
 
 Review comment:
   Seems like we're "leaking" implementation details here...is there a reason why the job creator function needs to know the Scan's going to be a skip scan? Why should it need to change in the future if we decide to use a different Scan filter?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gokceni commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gokceni commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r364917952
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOG = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+//        list.add(new Object[] {
+//                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+//                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before public void setup() throws SQLException {
+        generateUniqueTableNames();
+        createTestTable(getUrl(), String.format(dataTableDdl, dataTableFullName));
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            createTestTable(getUrl(), String.format(viewDdl, viewFullName, dataTableFullName));
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, viewFullName));
+        } else {
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, dataTableFullName));
+        }
+        props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+        conn = DriverManager.getConnection(getUrl(), props);
+        String dataTableUpsert = String.format(UPSERT_SQL, dataTableFullName);
+        dataTableUpsertStmt = conn.prepareStatement(dataTableUpsert);
+        String viewUpsert = String.format(UPSERT_SQL, viewFullName);
+        viewUpsertStmt = conn.prepareStatement(viewUpsert);
+        conn.setAutoCommit(false);
+        testTime = EnvironmentEdgeManager.currentTimeMillis() - 1000;
+
+    }
+
+    @After public void teardown() throws SQLException {
+        if (conn != null) {
+            conn.close();
+        }
+    }
+
+    @Test
+    public void testSkipScan() throws Exception {
+        String baseTableName = dataTableName;
+        PreparedStatement upsertStmt = dataTableUpsertStmt;
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            baseTableName = viewName;
+            upsertStmt = viewUpsertStmt;
+        }
+
+        for (int i=0; i < 2; i++) {
+            if (dataTableDdl.contains("MULTI_TENANT")) {
+                upsertRowMultiTenant(upsertStmt, "val"+i, i,"name-" + i, 19999);
+            } else {
+                upsertRow(upsertStmt, i, "name-" + i, 19999);
 
 Review comment:
   yes, we just check if the filter is used or not.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365360247
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolIT.java
 ##########
 @@ -188,13 +194,15 @@ public IndexScrutinyToolIT(String dataTableDdl, String indexTableDdl) {
             assertEquals(0, getCounterValue(counters, INVALID_ROW_COUNT));
 
             // Now insert a different varbinary row
-            upsertRow(upsertDataStmt, 3, "name-3", new byte[] {1, 1, 1, 1});
+            upsertRow(upsertDataStmt, 3, "name-3", new byte[] { 1, 1, 1, 1 });
             conn.commit();
 
-            PreparedStatement upsertIndexStmt = conn.prepareStatement(String.format(upsertIndex, indexTableName));
+            PreparedStatement
+                    upsertIndexStmt =
+                    conn.prepareStatement(String.format(upsertIndex, indexTableName));
             upsertIndexStmt.setString(1, "name-3");
             upsertIndexStmt.setInt(2, 3);
-            upsertIndexStmt.setBytes(3, new byte[] {0, 0, 0, 1});
+            upsertIndexStmt.setBytes(3, new byte[] { 0, 0, 0, 1 });
 
 Review comment:
   nit: unnecessary space

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361732636
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/IndexColumnNames.java
 ##########
 @@ -252,4 +288,61 @@ public String getQualifiedIndexTableName() {
     public List<String> getIndexPkColNames() {
         return indexPkColNames;
     }
+
+    public List<String> getDataColNamesForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return dataColNames;
+        } else {
+            return dataColNamesForSkipScan;
+        }
+ //       return dataColNamesForSkipScan;
+    }
+
+    public List<String> getIndexPkColNamesForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return indexPkColNames;
+        } else {
+            return indexPkColNamesForSkipScan;
+        }
+   //     return indexPkColNamesForSkipScan;
+    }
+
+    public List<String> getIndexColNamesForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return indexColNames;
+        } else {
+            return indexColNamesForSkipScan;
+        }
+ //      return indexColNamesForSkipScan;
 
 Review comment:
   nit: please remove the commented code.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gokceni commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gokceni commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r364918376
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/IndexColumnNames.java
 ##########
 @@ -79,6 +85,8 @@ public IndexColumnNames(final PTable pdataTable, final PTable pindexTable) {
                 dataColSqlTypeNames.add(getDataTypeString(dPkCol));
                 indexPkColNames.add(indexColumnName);
                 indexColSqlTypeNames.add(getDataTypeString(indexCol));
+                indexColSqlTypeNamesForSkipScan.put(indexColumnName, getDataTypeString(dPkCol));
 
 Review comment:
   It is not optional but I need these both. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365365829
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/IndexColumnNames.java
 ##########
 @@ -61,8 +67,13 @@ public IndexColumnNames(final PTable pdataTable, final PTable pindexTable) {
         if (pindexTable.getViewIndexId() != null) {
             offset++;
         }
-        if (pindexTable.isMultiTenant() && pindexTable.getViewIndexId() != null) {
-            offset++;
+
+        if (tenantId != null) {
+            // For multi tenant view indexes, in order for skipscan to be used, we need tenant column.
+            // For tenant connections this column is not visible.
+            if (pindexTable.isMultiTenant() && pindexTable.getViewIndexId() != null) {
 
 Review comment:
   @gokceni  Wouldn't you also need this for multi-tenant global indexes not on a view?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365359818
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolIT.java
 ##########
 @@ -165,19 +167,23 @@ public IndexScrutinyToolIT(String dataTableDdl, String indexTableDdl) {
     @Test public void testScrutinyOnArrayTypes() throws Exception {
         String dataTableName = generateUniqueName();
         String indexTableName = generateUniqueName();
-        String dataTableDDL = "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, VB VARBINARY)";
+        String
+                dataTableDDL =
 
 Review comment:
   nit: any reason dataTableDDL is on its own line?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365360073
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolIT.java
 ##########
 @@ -188,13 +194,15 @@ public IndexScrutinyToolIT(String dataTableDdl, String indexTableDdl) {
             assertEquals(0, getCounterValue(counters, INVALID_ROW_COUNT));
 
             // Now insert a different varbinary row
-            upsertRow(upsertDataStmt, 3, "name-3", new byte[] {1, 1, 1, 1});
+            upsertRow(upsertDataStmt, 3, "name-3", new byte[] { 1, 1, 1, 1 });
 
 Review comment:
   nit: unnecessary space

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365364774
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/index/IndexScrutinyMapper.java
 ##########
 @@ -54,6 +58,8 @@
 
 import com.google.common.base.Joiner;
 
+import static org.junit.Assert.assertEquals;
 
 Review comment:
   JUnit classes generally shouldn't be imported into non-test code, plus this looks unused.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361732670
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/IndexColumnNames.java
 ##########
 @@ -252,4 +288,61 @@ public String getQualifiedIndexTableName() {
     public List<String> getIndexPkColNames() {
         return indexPkColNames;
     }
+
+    public List<String> getDataColNamesForSkipScan() {
+        if (pdataTable.isMultiTenant()) {
+            return dataColNames;
+        } else {
+            return dataColNamesForSkipScan;
+        }
+ //       return dataColNamesForSkipScan;
 
 Review comment:
   nit: please remove the commented code.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361742134
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/IndexColumnNames.java
 ##########
 @@ -79,6 +85,8 @@ public IndexColumnNames(final PTable pdataTable, final PTable pindexTable) {
                 dataColSqlTypeNames.add(getDataTypeString(dPkCol));
                 indexPkColNames.add(indexColumnName);
                 indexColSqlTypeNames.add(getDataTypeString(indexCol));
+                indexColSqlTypeNamesForSkipScan.put(indexColumnName, getDataTypeString(dPkCol));
 
 Review comment:
   Is using skipscan for indexscrutiny optional? If not, can the above two lists be merged into?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361742134
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/IndexColumnNames.java
 ##########
 @@ -79,6 +85,8 @@ public IndexColumnNames(final PTable pdataTable, final PTable pindexTable) {
                 dataColSqlTypeNames.add(getDataTypeString(dPkCol));
                 indexPkColNames.add(indexColumnName);
                 indexColSqlTypeNames.add(getDataTypeString(indexCol));
+                indexColSqlTypeNamesForSkipScan.put(indexColumnName, getDataTypeString(dPkCol));
 
 Review comment:
   Is using skipscan for indexscrutiny optional? If not, can the above two lists be merged into one?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365360184
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolIT.java
 ##########
 @@ -188,13 +194,15 @@ public IndexScrutinyToolIT(String dataTableDdl, String indexTableDdl) {
             assertEquals(0, getCounterValue(counters, INVALID_ROW_COUNT));
 
             // Now insert a different varbinary row
-            upsertRow(upsertDataStmt, 3, "name-3", new byte[] {1, 1, 1, 1});
+            upsertRow(upsertDataStmt, 3, "name-3", new byte[] { 1, 1, 1, 1 });
             conn.commit();
 
-            PreparedStatement upsertIndexStmt = conn.prepareStatement(String.format(upsertIndex, indexTableName));
+            PreparedStatement
+                    upsertIndexStmt =
 
 Review comment:
   nit: ditto upsertIndexStmt

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365359999
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolIT.java
 ##########
 @@ -165,19 +167,23 @@ public IndexScrutinyToolIT(String dataTableDdl, String indexTableDdl) {
     @Test public void testScrutinyOnArrayTypes() throws Exception {
         String dataTableName = generateUniqueName();
         String indexTableName = generateUniqueName();
-        String dataTableDDL = "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, VB VARBINARY)";
+        String
+                dataTableDDL =
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, VB VARBINARY)";
         String indexTableDDL = "CREATE INDEX %s ON %s (NAME) INCLUDE (VB)";
         String upsertData = "UPSERT INTO %s VALUES (?, ?, ?)";
         String upsertIndex = "UPSERT INTO %s (\"0:NAME\", \":ID\", \"0:VB\") values (?,?,?)";
 
-        try (Connection conn =
-                DriverManager.getConnection(getUrl(), PropertiesUtil.deepCopy(TEST_PROPERTIES))) {
+        try (Connection conn = DriverManager.getConnection(getUrl(), PropertiesUtil.deepCopy(TEST_PROPERTIES))) {
             conn.createStatement().execute(String.format(dataTableDDL, dataTableName));
-            conn.createStatement().execute(String.format(indexTableDDL, indexTableName, dataTableName));
+            conn.createStatement()
+                    .execute(String.format(indexTableDDL, indexTableName, dataTableName));
             // insert two rows
-            PreparedStatement upsertDataStmt = conn.prepareStatement(String.format(upsertData, dataTableName));
-            upsertRow(upsertDataStmt, 1, "name-1", new byte[] {127, 0, 0, 1});
-            upsertRow(upsertDataStmt, 2, "name-2", new byte[] {127, 1, 0, 5});
+            PreparedStatement
+                    upsertDataStmt =
 
 Review comment:
   ditto upsertDataStmt?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gokceni closed pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gokceni closed pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666
 
 
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361738202
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOG = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+//        list.add(new Object[] {
+//                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+//                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before public void setup() throws SQLException {
+        generateUniqueTableNames();
+        createTestTable(getUrl(), String.format(dataTableDdl, dataTableFullName));
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            createTestTable(getUrl(), String.format(viewDdl, viewFullName, dataTableFullName));
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, viewFullName));
+        } else {
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, dataTableFullName));
+        }
+        props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+        conn = DriverManager.getConnection(getUrl(), props);
+        String dataTableUpsert = String.format(UPSERT_SQL, dataTableFullName);
+        dataTableUpsertStmt = conn.prepareStatement(dataTableUpsert);
+        String viewUpsert = String.format(UPSERT_SQL, viewFullName);
+        viewUpsertStmt = conn.prepareStatement(viewUpsert);
+        conn.setAutoCommit(false);
+        testTime = EnvironmentEdgeManager.currentTimeMillis() - 1000;
+
+    }
+
+    @After public void teardown() throws SQLException {
+        if (conn != null) {
+            conn.close();
+        }
+    }
+
+    @Test
+    public void testSkipScan() throws Exception {
+        String baseTableName = dataTableName;
+        PreparedStatement upsertStmt = dataTableUpsertStmt;
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            baseTableName = viewName;
+            upsertStmt = viewUpsertStmt;
+        }
+
+        for (int i=0; i < 2; i++) {
+            if (dataTableDdl.contains("MULTI_TENANT")) {
+                upsertRowMultiTenant(upsertStmt, "val"+i, i,"name-" + i, 19999);
+            } else {
+                upsertRow(upsertStmt, i, "name-" + i, 19999);
+            }
+            conn.commit();
+        }
+
+        IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.INDEX_TABLE_SOURCE);
+        PreparedStatement targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+
+        IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.DATA_TABLE_SOURCE);
+        targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+    }
+
+    private void assertSkipScanFilter(PreparedStatement targetStmt) throws SQLException {
 
 Review comment:
   Shall we have a test for measuring performance as well?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gokceni commented on issue #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gokceni commented on issue #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#issuecomment-576835754
 
 
   Closing this PR because we did not see that much improvement in performance as we were hoping to see. It is still faster than current version but not fast enough. The -b 1 is much faster than this option

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361736289
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOG = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
 
 Review comment:
   nit: Let's be consistent with using LOGGER and also the corresponding import

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365366703
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.INVALID_ROW_COUNT;
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.VALID_ROW_COUNT;
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOGGER = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, ID, ZIP, EMPLOYER, EMPLOY_DATE)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, ZIP, EMPLOY_DATE, EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID, NAME)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (ZIP) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s  ON %s (NAME, ID, ZIP) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before
+    public void setup() throws SQLException {
+        generateUniqueTableNames();
+        createTestTable(getUrl(), String.format(dataTableDdl, dataTableFullName));
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            createTestTable(getUrl(), String.format(viewDdl, viewFullName, dataTableFullName));
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, viewFullName));
+        } else {
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, dataTableFullName));
+        }
+        props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+        conn = DriverManager.getConnection(getUrl(), props);
+        String dataTableUpsert = String.format(UPSERT_SQL, dataTableFullName);
+        dataTableUpsertStmt = conn.prepareStatement(dataTableUpsert);
+        String viewUpsert = String.format(UPSERT_SQL, viewFullName);
+        viewUpsertStmt = conn.prepareStatement(viewUpsert);
+        conn.setAutoCommit(false);
+        testTime = EnvironmentEdgeManager.currentTimeMillis() - 1000;
+
+    }
+
+    @After
+    public void teardown() throws SQLException {
+        if (conn != null) {
+            conn.close();
+        }
+    }
+
+    @Test
+    public void testSkipScan() throws Exception {
+        String baseTableName = dataTableName;
+        PreparedStatement upsertStmt = dataTableUpsertStmt;
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            baseTableName = viewName;
+            upsertStmt = viewUpsertStmt;
+        }
+
+        for (int i=0; i < 2; i++) {
+            if (dataTableDdl.contains("MULTI_TENANT")) {
+                upsertRowMultiTenant(upsertStmt, "val"+i, i,"name-" + i, 19999);
+            } else {
+                upsertRow(upsertStmt, i, "name-" + i, 19999);
+            }
+            conn.commit();
+        }
+
+        List<Job> completedJobs = IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.DATA_TABLE_SOURCE);
+        PreparedStatement targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
 
 Review comment:
   If we're just running scrutiny to generate the statement we're looking up, should there be a dry run mode that just generates that statement but doesn't actually run the job?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361732176
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java
 ##########
 @@ -300,7 +300,6 @@ public void testOnePhaseOverwiteFollowingTwoPhaseWrite() throws Exception {
             verifyTableHealth(conn, dataTableName, indexTableName);
         }
     }
-
 
 Review comment:
   nit: unnecessary diff

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361736460
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOG = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+//        list.add(new Object[] {
+//                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+//                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before public void setup() throws SQLException {
+        generateUniqueTableNames();
+        createTestTable(getUrl(), String.format(dataTableDdl, dataTableFullName));
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            createTestTable(getUrl(), String.format(viewDdl, viewFullName, dataTableFullName));
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, viewFullName));
+        } else {
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, dataTableFullName));
+        }
+        props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+        conn = DriverManager.getConnection(getUrl(), props);
+        String dataTableUpsert = String.format(UPSERT_SQL, dataTableFullName);
+        dataTableUpsertStmt = conn.prepareStatement(dataTableUpsert);
+        String viewUpsert = String.format(UPSERT_SQL, viewFullName);
+        viewUpsertStmt = conn.prepareStatement(viewUpsert);
+        conn.setAutoCommit(false);
+        testTime = EnvironmentEdgeManager.currentTimeMillis() - 1000;
+
+    }
+
+    @After public void teardown() throws SQLException {
+        if (conn != null) {
+            conn.close();
+        }
+    }
+
+    @Test
+    public void testSkipScan() throws Exception {
+        String baseTableName = dataTableName;
+        PreparedStatement upsertStmt = dataTableUpsertStmt;
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            baseTableName = viewName;
+            upsertStmt = viewUpsertStmt;
+        }
+
+        for (int i=0; i < 2; i++) {
+            if (dataTableDdl.contains("MULTI_TENANT")) {
+                upsertRowMultiTenant(upsertStmt, "val"+i, i,"name-" + i, 19999);
+            } else {
+                upsertRow(upsertStmt, i, "name-" + i, 19999);
 
 Review comment:
   if I understand this correctly, this ll only insert 2 rows?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gokceni commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gokceni commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r364922170
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOG = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+//        list.add(new Object[] {
+//                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+//                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before public void setup() throws SQLException {
+        generateUniqueTableNames();
+        createTestTable(getUrl(), String.format(dataTableDdl, dataTableFullName));
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            createTestTable(getUrl(), String.format(viewDdl, viewFullName, dataTableFullName));
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, viewFullName));
+        } else {
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, dataTableFullName));
+        }
+        props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+        conn = DriverManager.getConnection(getUrl(), props);
+        String dataTableUpsert = String.format(UPSERT_SQL, dataTableFullName);
+        dataTableUpsertStmt = conn.prepareStatement(dataTableUpsert);
+        String viewUpsert = String.format(UPSERT_SQL, viewFullName);
+        viewUpsertStmt = conn.prepareStatement(viewUpsert);
+        conn.setAutoCommit(false);
+        testTime = EnvironmentEdgeManager.currentTimeMillis() - 1000;
+
+    }
+
+    @After public void teardown() throws SQLException {
+        if (conn != null) {
+            conn.close();
+        }
+    }
+
+    @Test
+    public void testSkipScan() throws Exception {
+        String baseTableName = dataTableName;
+        PreparedStatement upsertStmt = dataTableUpsertStmt;
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            baseTableName = viewName;
+            upsertStmt = viewUpsertStmt;
+        }
+
+        for (int i=0; i < 2; i++) {
+            if (dataTableDdl.contains("MULTI_TENANT")) {
+                upsertRowMultiTenant(upsertStmt, "val"+i, i,"name-" + i, 19999);
+            } else {
+                upsertRow(upsertStmt, i, "name-" + i, 19999);
+            }
+            conn.commit();
+        }
+
+        IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.INDEX_TABLE_SOURCE);
+        PreparedStatement targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+
+        IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.DATA_TABLE_SOURCE);
+        targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+    }
+
+    private void assertSkipScanFilter(PreparedStatement targetStmt) throws SQLException {
+        QueryPlan plan = targetStmt.unwrap(PhoenixStatement.class).getQueryPlan();
+        Scan scan = plan.getScans().get(0).get(0);
+        Filter filter = scan.getFilter();
+        if (filter instanceof FilterList) {
+            FilterList filterList = (FilterList) scan.getFilter();
+
+            boolean isSkipScanUsed = false;
+            for (Filter f : filterList.getFilters()) {
+                if (f instanceof SkipScanFilter) {
+                    isSkipScanUsed = true;
+                    break;
+                }
+            }
+            assertEquals(true, isSkipScanUsed);
+        } else {
+            assertEquals(SkipScanFilter.class, filter.getClass());
+        }
+    }
+    
+    private void generateUniqueTableNames() {
+        schemaName = "S_" + generateUniqueName();
+        dataTableName = "D_" + generateUniqueName();
+        dataTableFullName = SchemaUtil.getTableName(schemaName, dataTableName);
+        viewName = "V_" + generateUniqueName();
+        viewFullName = SchemaUtil.getTableName(schemaName, viewName);
+        indexTableName = "I_" + generateUniqueName();
+        indexTableFullName = SchemaUtil.getTableName(schemaName, indexTableName);
+    }
+
+    private void upsertRow(PreparedStatement stmt, int id, String name, int zip) throws SQLException {
+        int index = 1;
+        // insert row
+        stmt.setInt(index++, id);
+        stmt.setString(index++, name);
+        stmt.setInt(index++, zip);
+        stmt.setTimestamp(index++, new Timestamp(testTime));
+        stmt.executeUpdate();
+    }
+
+    private void upsertRowMultiTenant(PreparedStatement stmt, String col, int id, String name, int zip) throws SQLException {
 
 Review comment:
   IndexScrutinyToolIT doesn't have multi tenant test. There is a separate test file for multi tenant but it is a bigger change. It doesn't use prepared statement and uses tenant connections and global connections.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361732082
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOG = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+//        list.add(new Object[] {
 
 Review comment:
   nit: please remove the commented code.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365361835
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.INVALID_ROW_COUNT;
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.VALID_ROW_COUNT;
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOGGER = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, ID, ZIP, EMPLOYER, EMPLOY_DATE)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, ZIP, EMPLOY_DATE, EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
 
 Review comment:
   nit: normally I don't flag long lines warnings very often -- but these lines are really long. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
swaroopak commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r361736092
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolIT.java
 ##########
 @@ -694,7 +702,6 @@ private void upsertRow(PreparedStatement stmt, int id, String name, byte[] val)
         stmt.setInt(index++, id);
         stmt.setString(index++, name);
         stmt.setBytes(index++, val);
-        stmt.executeUpdate();
 
 Review comment:
   why was this removed?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365363332
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.INVALID_ROW_COUNT;
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.VALID_ROW_COUNT;
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOGGER = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
 
 Review comment:
   Also, given that we're adding so many cases here, are there redundancies between this and the other IndexScrutinyIT and related tests that can be consolidated? No need to test the same scenarios twice. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gokceni commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gokceni commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r364918053
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOG = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+//        list.add(new Object[] {
+//                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+//                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before public void setup() throws SQLException {
+        generateUniqueTableNames();
+        createTestTable(getUrl(), String.format(dataTableDdl, dataTableFullName));
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            createTestTable(getUrl(), String.format(viewDdl, viewFullName, dataTableFullName));
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, viewFullName));
+        } else {
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, dataTableFullName));
+        }
+        props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+        conn = DriverManager.getConnection(getUrl(), props);
+        String dataTableUpsert = String.format(UPSERT_SQL, dataTableFullName);
+        dataTableUpsertStmt = conn.prepareStatement(dataTableUpsert);
+        String viewUpsert = String.format(UPSERT_SQL, viewFullName);
+        viewUpsertStmt = conn.prepareStatement(viewUpsert);
+        conn.setAutoCommit(false);
+        testTime = EnvironmentEdgeManager.currentTimeMillis() - 1000;
+
+    }
+
+    @After public void teardown() throws SQLException {
+        if (conn != null) {
+            conn.close();
+        }
+    }
+
+    @Test
+    public void testSkipScan() throws Exception {
+        String baseTableName = dataTableName;
+        PreparedStatement upsertStmt = dataTableUpsertStmt;
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            baseTableName = viewName;
+            upsertStmt = viewUpsertStmt;
+        }
+
+        for (int i=0; i < 2; i++) {
+            if (dataTableDdl.contains("MULTI_TENANT")) {
+                upsertRowMultiTenant(upsertStmt, "val"+i, i,"name-" + i, 19999);
+            } else {
+                upsertRow(upsertStmt, i, "name-" + i, 19999);
+            }
+            conn.commit();
+        }
+
+        IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.INDEX_TABLE_SOURCE);
+        PreparedStatement targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+
+        IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.DATA_TABLE_SOURCE);
+        targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+    }
+
+    private void assertSkipScanFilter(PreparedStatement targetStmt) throws SQLException {
 
 Review comment:
   Not sure how we can do that

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [phoenix] gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries

Posted by GitBox <gi...@apache.org>.
gjacoby126 commented on a change in pull request #666: Phoenix-5652 Improve IndexScrutinyTool perf by using SkipScan in index table queries
URL: https://github.com/apache/phoenix/pull/666#discussion_r365367168
 
 

 ##########
 File path: phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolPerformanceIT.java
 ##########
 @@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.jdbc.PhoenixStatement;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
+import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.PropertiesUtil;
+import org.apache.phoenix.util.SchemaUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.INVALID_ROW_COUNT;
+import static org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters.VALID_ROW_COUNT;
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+
+
+/**
+ * Tests for the {@link IndexScrutinyTool}
+ */
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolPerformanceIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
+    private String viewDdl;
+
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
+
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
+    private String viewName;
+    private String viewFullName;
+
+    private Connection conn;
+
+    private PreparedStatement dataTableUpsertStmt;
+    private PreparedStatement viewUpsertStmt;
+
+    private long testTime;
+    private Properties props;
+
+    private static final Log LOGGER = LogFactory.getLog(IndexScrutinyToolPerformanceIT.class);
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        List<Object[]> list = Lists.newArrayListWithExpectedSize(15);
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, ID, ZIP, EMPLOYER, EMPLOY_DATE)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                null, "CREATE INDEX %s ON %s (NAME, ZIP, EMPLOY_DATE, EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE) INCLUDE (ZIP)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) ",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER)" });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR CONSTRAINT PK_1 PRIMARY KEY (ID, NAME)) SALT_BUCKETS=2",
+                null, "CREATE INDEX %s ON %s (EMPLOY_DATE, ZIP) INCLUDE (EMPLOYER) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR NOT NULL, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID, NAME)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s ON %s (ZIP) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR)",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                "CREATE VIEW %s AS SELECT * FROM %s", "CREATE INDEX %s ON %s (NAME) " });
+        list.add(new Object[] {
+                "CREATE TABLE %s (COL1 VARCHAR(15) NOT NULL,ID INTEGER NOT NULL, NAME VARCHAR, ZIP INTEGER CONSTRAINT PK_1 PRIMARY KEY (COL1, ID)) MULTI_TENANT=true",
+                null, "CREATE INDEX %s  ON %s (NAME, ID, ZIP) " });
+        return list;
+    }
+
+    public IndexScrutinyToolPerformanceIT(String dataTableDdl, String viewDdl, String indexTableDdl) {
+        this.dataTableDdl = dataTableDdl;
+        this.viewDdl = viewDdl;
+        this.indexTableDdl = indexTableDdl;
+    }
+
+    /**
+     * Create the test data and index tables
+     */
+    @Before
+    public void setup() throws SQLException {
+        generateUniqueTableNames();
+        createTestTable(getUrl(), String.format(dataTableDdl, dataTableFullName));
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            createTestTable(getUrl(), String.format(viewDdl, viewFullName, dataTableFullName));
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, viewFullName));
+        } else {
+            createTestTable(getUrl(), String.format(indexTableDdl, indexTableName, dataTableFullName));
+        }
+        props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+        conn = DriverManager.getConnection(getUrl(), props);
+        String dataTableUpsert = String.format(UPSERT_SQL, dataTableFullName);
+        dataTableUpsertStmt = conn.prepareStatement(dataTableUpsert);
+        String viewUpsert = String.format(UPSERT_SQL, viewFullName);
+        viewUpsertStmt = conn.prepareStatement(viewUpsert);
+        conn.setAutoCommit(false);
+        testTime = EnvironmentEdgeManager.currentTimeMillis() - 1000;
+
+    }
+
+    @After
+    public void teardown() throws SQLException {
+        if (conn != null) {
+            conn.close();
+        }
+    }
+
+    @Test
+    public void testSkipScan() throws Exception {
+        String baseTableName = dataTableName;
+        PreparedStatement upsertStmt = dataTableUpsertStmt;
+        if (!Strings.isNullOrEmpty(viewDdl)) {
+            baseTableName = viewName;
+            upsertStmt = viewUpsertStmt;
+        }
+
+        for (int i=0; i < 2; i++) {
+            if (dataTableDdl.contains("MULTI_TENANT")) {
+                upsertRowMultiTenant(upsertStmt, "val"+i, i,"name-" + i, 19999);
+            } else {
+                upsertRow(upsertStmt, i, "name-" + i, 19999);
+            }
+            conn.commit();
+        }
+
+        List<Job> completedJobs = IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.DATA_TABLE_SOURCE);
+        PreparedStatement targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+        Job job = completedJobs.get(0);
+        Counters counters = job.getCounters();
+        assertEquals(2, getCounterValue(counters, VALID_ROW_COUNT));
+        assertEquals(0, getCounterValue(counters, INVALID_ROW_COUNT));
+
+        completedJobs = IndexScrutinyToolIT.runScrutiny(schemaName, baseTableName,
+                indexTableName, null, SourceTable.INDEX_TABLE_SOURCE);
+        targetStmt = IndexScrutinyTool.getTargetTableQueryForScrutinyTool();
+        assertSkipScanFilter(targetStmt);
+
+        job = completedJobs.get(0);
+        counters = job.getCounters();
+        assertEquals(2, getCounterValue(counters, VALID_ROW_COUNT));
+        assertEquals(0, getCounterValue(counters, INVALID_ROW_COUNT));
+    }
+
+    private void assertSkipScanFilter(PreparedStatement targetStmt) throws SQLException {
+        QueryPlan plan = targetStmt.unwrap(PhoenixStatement.class).getQueryPlan();
+        Scan scan = plan.getScans().get(0).get(0);
+        Filter filter = scan.getFilter();
 
 Review comment:
   Is the presence of a SkipScan filter sufficient to guarantee correctness, or is there some other additional information we need, like what's being filtered?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services