You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@phoenix.apache.org by "Viraj Jasani (Jira)" <ji...@apache.org> on 2020/12/07 07:25:00 UTC

[jira] [Updated] (PHOENIX-5265) [UMBRELLA] Phoenix Test should use object based Plan for result comparison instead of using hard-corded comparison

     [ https://issues.apache.org/jira/browse/PHOENIX-5265?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Viraj Jasani updated PHOENIX-5265:
----------------------------------
    Summary: [UMBRELLA] Phoenix Test should use object based Plan for result comparison instead of using hard-corded comparison  (was: [UMBRELLA] Phoenix Test should use gold files for result comparison instead of using hard-corded comparison.)

> [UMBRELLA] Phoenix Test should use object based Plan for result comparison instead of using hard-corded comparison
> ------------------------------------------------------------------------------------------------------------------
>
>                 Key: PHOENIX-5265
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-5265
>             Project: Phoenix
>          Issue Type: Improvement
>         Environment: {code:java}
> // code placeholder
> @Test
> public void testWithMultiCF() throws Exception {
>     int nRows = 20;
>     Connection conn = getConnection(0);
>     PreparedStatement stmt;
>     conn.createStatement().execute(
>             "CREATE TABLE " + fullTableName
>                     + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, c.v INTEGER NULL, d.v INTEGER NULL) "
>                     + tableDDLOptions );
>     stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + " VALUES(?,?, ?, ?, ?)");
>     byte[] val = new byte[250];
>     for (int i = 0; i < nRows; i++) {
>         stmt.setString(1, Character.toString((char)('a' + i)) + Bytes.toString(val));
>         stmt.setInt(2, i);
>         stmt.setInt(3, i);
>         stmt.setInt(4, i);
>         stmt.setInt(5, i);
>         stmt.executeUpdate();
>     }
>     conn.commit();
>     stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, c.v, d.v) VALUES(?,?,?)");
>     for (int i = 0; i < 5; i++) {
>         stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + Bytes.toString(val));
>         stmt.setInt(2, i);
>         stmt.setInt(3, i);
>         stmt.executeUpdate();
>     }
>     conn.commit();
>     ResultSet rs;
>     String actualExplainPlan;
>     collectStatistics(conn, fullTableName);
>     List<KeyRange> keyRanges = getAllSplits(conn, fullTableName);
>     assertEquals(26, keyRanges.size());
>     rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + fullTableName);
>     actualExplainPlan = QueryUtil.getExplainPlan(rs);
>     assertEquals(
>             "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12530" : "14190" ) : (TransactionFactory.Provider.OMID.name().equals(transactionProvider)) ? "25320" : "12420") +
>                     " BYTES PARALLEL 1-WAY FULL SCAN OVER " + physicalTableName,
>             actualExplainPlan);
>     ConnectionQueryServices services = conn.unwrap(PhoenixConnection.class).getQueryServices();
>     List<HRegionLocation> regions = services.getAllTableRegions(Bytes.toBytes(physicalTableName));
>     assertEquals(1, regions.size());
>     collectStatistics(conn, fullTableName, Long.toString(1000));
>     keyRanges = getAllSplits(conn, fullTableName);
>     boolean oneCellPerColFamilyStorageScheme = !mutable && columnEncoded;
>     boolean hasShadowCells = TransactionFactory.Provider.OMID.name().equals(transactionProvider);
>     assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ? 24 : 13, keyRanges.size());
>     rs = conn
>             .createStatement()
>             .executeQuery(
>                     "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from \"SYSTEM\".STATS where PHYSICAL_NAME = '"
>                             + physicalTableName + "' GROUP BY COLUMN_FAMILY ORDER BY COLUMN_FAMILY");
>     assertTrue(rs.next());
>     assertEquals("A", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowCells ? 25320 : 12420, rs.getInt(3));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ? 23 : 12, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("B", rs.getString(1));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCells ? 11260 : 5540, rs.getInt(3));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? 10 : 5, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("C", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? 14085 : 6930, rs.getInt(3));
>     assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("D", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? 14085 : 6930, rs.getInt(3));
>     assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
>     assertFalse(rs.next());
>     // Disable stats
>     conn.createStatement().execute("ALTER TABLE " + fullTableName + 
>             " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=0");
>     collectStatistics(conn, fullTableName);
>     // Assert that there are no more guideposts
>     rs = conn.createStatement().executeQuery("SELECT count(1) FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + 
>             " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "='" + physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS NOT NULL");
>     assertTrue(rs.next());
>     assertEquals(0, rs.getLong(1));
>     assertFalse(rs.next());
>     rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + fullTableName);
>     actualExplainPlan = QueryUtil.getExplainPlan(rs);
>     assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + physicalTableName, actualExplainPlan);
> }
> {code}
>            Reporter: Bin Shi
>            Assignee: Viraj Jasani
>            Priority: Major
>              Labels: hardening, phoenix-hardening
>
> Currently, in Phoenix Test, the comparison of the returned result and the expected result is always hard-coded, which is especially widely used by E2E and Integration test for comparing the query result, including the result of EXPLAIN query plan. This has significantly impaired the productivity and efficiency of workin on Phoenix.
> The right approach is, each test case should write the generated result to a file under a new directory, then compares this file with a gold file under "gold" directory. After we make some code changes and make sure the change is correct by manually verifying several specific test cases, we can safely replace the whole "gold" directory with the directory which contains all new generated test files during test running. In this way, we can easily rebase all the tests.
> For example, in BaseStatsCollectorIT.java, we verify the estimated row size in the returned result of EXPLAIN query plan, the row size is decided by many factors, like the column is encoded or not, it is mutable or not, it uses transaction provider or not, it uses TEPHRA or OMID as transaction provider, etc. The code snippet "testWithMultiCF" shows a typical test case. The comparisons of the result result and the expected result are hard-coded in those asserts. Now imagine, if we change the way collecting stats or we change column encoding scheme or we change the cell storage format for TEPHRA or OMID, which is very likely to happen, then we need to manually change all those hard-coded comparison everywhere, and it isn't trivial to re-calculate all expected row sizes with the different conditions . Today you might need one or two weeks to rebase all the tests, which is a huge waste. We should use "gold" files here, so that we can rebase the test very easily.
>  BTW, the new generated test result files and gold files should be in XML or JSON. The result of "EXPLAN" query should be in XML or JSON too, because the file format matches the structure of a query plan.
> {code:java}
> // code placeholder
> @Test
> public void testWithMultiCF() throws Exception {
>     int nRows = 20;
>     Connection conn = getConnection(0);
>     PreparedStatement stmt;
>     conn.createStatement().execute(
>             "CREATE TABLE " + fullTableName
>                     + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, c.v INTEGER NULL, d.v INTEGER NULL) "
>                     + tableDDLOptions );
>     stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + " VALUES(?,?, ?, ?, ?)");
>     byte[] val = new byte[250];
>     for (int i = 0; i < nRows; i++) {
>         stmt.setString(1, Character.toString((char)('a' + i)) + Bytes.toString(val));
>         stmt.setInt(2, i);
>         stmt.setInt(3, i);
>         stmt.setInt(4, i);
>         stmt.setInt(5, i);
>         stmt.executeUpdate();
>     }
>     conn.commit();
>     stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, c.v, d.v) VALUES(?,?,?)");
>     for (int i = 0; i < 5; i++) {
>         stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + Bytes.toString(val));
>         stmt.setInt(2, i);
>         stmt.setInt(3, i);
>         stmt.executeUpdate();
>     }
>     conn.commit();
>     ResultSet rs;
>     String actualExplainPlan;
>     collectStatistics(conn, fullTableName);
>     List<KeyRange> keyRanges = getAllSplits(conn, fullTableName);
>     assertEquals(26, keyRanges.size());
>     rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + fullTableName);
>     actualExplainPlan = QueryUtil.getExplainPlan(rs);
>     assertEquals(
>             "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12530" : "14190" ) : (TransactionFactory.Provider.OMID.name().equals(transactionProvider)) ? "25320" : "12420") +
>                     " BYTES PARALLEL 1-WAY FULL SCAN OVER " + physicalTableName,
>             actualExplainPlan);
>     ConnectionQueryServices services = conn.unwrap(PhoenixConnection.class).getQueryServices();
>     List<HRegionLocation> regions = services.getAllTableRegions(Bytes.toBytes(physicalTableName));
>     assertEquals(1, regions.size());
>     collectStatistics(conn, fullTableName, Long.toString(1000));
>     keyRanges = getAllSplits(conn, fullTableName);
>     boolean oneCellPerColFamilyStorageScheme = !mutable && columnEncoded;
>     boolean hasShadowCells = TransactionFactory.Provider.OMID.name().equals(transactionProvider);
>     assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ? 24 : 13, keyRanges.size());
>     rs = conn
>             .createStatement()
>             .executeQuery(
>                     "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from \"SYSTEM\".STATS where PHYSICAL_NAME = '"
>                             + physicalTableName + "' GROUP BY COLUMN_FAMILY ORDER BY COLUMN_FAMILY");
>     assertTrue(rs.next());
>     assertEquals("A", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowCells ? 25320 : 12420, rs.getInt(3));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ? 23 : 12, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("B", rs.getString(1));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCells ? 11260 : 5540, rs.getInt(3));
>     assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? 10 : 5, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("C", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? 14085 : 6930, rs.getInt(3));
>     assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
>     assertTrue(rs.next());
>     assertEquals("D", rs.getString(1));
>     assertEquals(25, rs.getInt(2));
>     assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCells ? 14085 : 6930, rs.getInt(3));
>     assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4));
>     assertFalse(rs.next());
>     // Disable stats
>     conn.createStatement().execute("ALTER TABLE " + fullTableName + 
>             " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=0");
>     collectStatistics(conn, fullTableName);
>     // Assert that there are no more guideposts
>     rs = conn.createStatement().executeQuery("SELECT count(1) FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + 
>             " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "='" + physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS NOT NULL");
>     assertTrue(rs.next());
>     assertEquals(0, rs.getLong(1));
>     assertFalse(rs.next());
>     rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + fullTableName);
>     actualExplainPlan = QueryUtil.getExplainPlan(rs);
>     assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + physicalTableName, actualExplainPlan);
> }
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)