You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by an...@apache.org on 2019/07/11 01:45:35 UTC
[phoenix] branch 4.x-HBase-1.4 updated: PHOENIX-3541 Bulk Data Loading - Can't use table name by small letter

This is an automated email from the ASF dual-hosted git repository.

ankit pushed a commit to branch 4.x-HBase-1.4
in repository https://gitbox.apache.org/repos/asf/phoenix.git


The following commit(s) were added to refs/heads/4.x-HBase-1.4 by this push:
     new 685fb07  PHOENIX-3541 Bulk Data Loading - Can't use table name by small letter
685fb07 is described below

commit 685fb0756dc469c7eaaf1085e3409c3046b3e1d2
Author: Karthik Palanisamy <kp...@cloudera.com>
AuthorDate: Wed Jul 10 18:37:01 2019 -0700

    PHOENIX-3541 Bulk Data Loading - Can't use table name by small letter
---
 .../apache/phoenix/end2end/CsvBulkLoadToolIT.java  | 68 ++++++++++++++++++++++
 .../phoenix/mapreduce/AbstractBulkLoadTool.java    | 43 ++++++++++++--
 2 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
index f91956c..b301263 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
@@ -536,4 +536,72 @@ public class CsvBulkLoadToolIT extends BaseOwnClusterIT {
             }
         }
     }
+
+    @Test
+    public void testImportWithUpperCaseSchemaNameAndLowerCaseTableName() throws Exception {
+        Statement stmt = conn.createStatement();
+        stmt.execute("CREATE TABLE S.\"t\" (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, " +
+                              "T DATE) SPLIT ON (1,2)");
+        FileSystem fs = FileSystem.get(getUtility().getConfiguration());
+        FSDataOutputStream outputStream = fs.create(new Path("/tmp/input1.csv"));
+        PrintWriter printWriter = new PrintWriter(outputStream);
+        printWriter.println("1,Name 1,1970/01/01");
+        printWriter.println("2,Name 2,1970/01/02");
+        printWriter.close();
+        CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
+        csvBulkLoadTool.setConf(new Configuration(getUtility().getConfiguration()));
+        csvBulkLoadTool.getConf().set(DATE_FORMAT_ATTRIB,"yyyy/MM/dd");
+        int exitCode = csvBulkLoadTool.run(new String[] {
+                "--input", "/tmp/input1.csv",
+                "--table", "\"\"t\"\"",
+                "--schema", "S",
+                "--zookeeper", zkQuorum});
+        assertEquals(0, exitCode);
+        ResultSet rs = stmt.executeQuery("SELECT id, name, t FROM S.\"t\" ORDER BY id");
+        assertTrue(rs.next());
+        assertEquals(1, rs.getInt(1));
+        assertEquals("Name 1", rs.getString(2));
+        assertEquals(DateUtil.parseDate("1970-01-01"), rs.getDate(3));
+        assertTrue(rs.next());
+        assertEquals(2, rs.getInt(1));
+        assertEquals("Name 2", rs.getString(2));
+        assertEquals(DateUtil.parseDate("1970-01-02"), rs.getDate(3));
+        assertFalse(rs.next());
+        rs.close();
+        stmt.close();
+    }
+
+    @Test
+    public void testImportWithLowerCaseSchemaNameAndUpperCaseTableName() throws Exception {
+        Statement stmt = conn.createStatement();
+        stmt.execute("CREATE TABLE \"s\".T (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, " +
+                               "T DATE) SPLIT ON (1,2)");
+        FileSystem fs = FileSystem.get(getUtility().getConfiguration());
+        FSDataOutputStream outputStream = fs.create(new Path("/tmp/input1.csv"));
+        PrintWriter printWriter = new PrintWriter(outputStream);
+        printWriter.println("1,Name 1,1970/01/01");
+        printWriter.println("2,Name 2,1970/01/02");
+        printWriter.close();
+        CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
+        csvBulkLoadTool.setConf(new Configuration(getUtility().getConfiguration()));
+        csvBulkLoadTool.getConf().set(DATE_FORMAT_ATTRIB,"yyyy/MM/dd");
+        int exitCode = csvBulkLoadTool.run(new String[] {
+                "--input", "/tmp/input1.csv",
+                "--table", "T",
+                "--schema", "\"\"s\"\"",
+                "--zookeeper", zkQuorum});
+        assertEquals(0, exitCode);
+        ResultSet rs = stmt.executeQuery("SELECT id, name, t FROM \"s\".T ORDER BY id");
+        assertTrue(rs.next());
+        assertEquals(1, rs.getInt(1));
+        assertEquals("Name 1", rs.getString(2));
+        assertEquals(DateUtil.parseDate("1970-01-01"), rs.getDate(3));
+        assertTrue(rs.next());
+        assertEquals(2, rs.getInt(1));
+        assertEquals("Name 2", rs.getString(2));
+        assertEquals(DateUtil.parseDate("1970-01-02"), rs.getDate(3));
+        assertFalse(rs.next());
+        rs.close();
+        stmt.close();
+    }
 }
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/AbstractBulkLoadTool.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/AbstractBulkLoadTool.java
index c5482e7..396bf02 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/AbstractBulkLoadTool.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/AbstractBulkLoadTool.java
@@ -178,16 +178,45 @@ public abstract class AbstractBulkLoadTool extends Configured implements Tool {
         return loadData(conf, cmdLine);
     }
 
+    /**
+     * Check schema or table name that start with two double quotes i.e ""t"" -> true
+     */
+    private boolean isStartWithTwoDoubleQuotes (String name) {
+        boolean start = false;
+        boolean end = false;
+        if (name != null && name.length() > 1) {
+             int length = name.length();
+             start = name.substring(0,2).equals("\"\"");
+             end =  name.substring(length-2, length).equals("\"\"");
+             if (start && !end) {
+                 throw new IllegalArgumentException("Invalid table/schema name " + name +
+                         ". Please check if name end with two double quotes.");
+             }
+        }
+        return start;
+    }
+
+
     private int loadData(Configuration conf, CommandLine cmdLine) throws Exception {
         String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt());
         String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt());
         String indexTableName = cmdLine.getOptionValue(INDEX_TABLE_NAME_OPT.getOpt());
+        boolean quotedTableName = isStartWithTwoDoubleQuotes(tableName);
+        if (quotedTableName) {
+            // Commons-cli cannot parse full quoted argument i.e "t" (CLI-275).
+            // if \"\"t\"\" passed, then both pairs of quoted are left intact as ""t"".
+            // So remove one pair of quote from tablename ""t"" -> "t".
+            tableName = tableName.substring(1, tableName.length() - 1);
+        }
+        boolean quotedSchemaName = isStartWithTwoDoubleQuotes(schemaName);
+        if (quotedSchemaName) {
+            schemaName = schemaName.substring(1,schemaName.length() - 1);
+        }
         String qualifiedTableName = SchemaUtil.getQualifiedTableName(schemaName, tableName);
         String qualifiedIndexTableName = null;
         if (indexTableName != null){
             qualifiedIndexTableName = SchemaUtil.getQualifiedTableName(schemaName, indexTableName);
         }
-
         if (cmdLine.hasOption(ZK_QUORUM_OPT.getOpt())) {
             // ZK_QUORUM_OPT is optional, but if it's there, use it for both the conn and the job.
             String zkQuorum = cmdLine.getOptionValue(ZK_QUORUM_OPT.getOpt());
@@ -216,12 +245,14 @@ public abstract class AbstractBulkLoadTool extends Configured implements Tool {
         FormatToBytesWritableMapper.configureColumnInfoList(conf, importColumns);
         boolean ignoreInvalidRows = cmdLine.hasOption(IGNORE_ERRORS_OPT.getOpt());
         conf.setBoolean(FormatToBytesWritableMapper.IGNORE_INVALID_ROW_CONFKEY, ignoreInvalidRows);
-        conf.set(FormatToBytesWritableMapper.TABLE_NAME_CONFKEY, qualifiedTableName);
-
+        conf.set(FormatToBytesWritableMapper.TABLE_NAME_CONFKEY,
+                SchemaUtil.getEscapedFullTableName(qualifiedTableName));
         // give subclasses their hook
         configureOptions(cmdLine, importColumns, conf);
+        String sName = SchemaUtil.normalizeIdentifier(schemaName);
+        String tName = SchemaUtil.normalizeIdentifier(tableName);
         try {
-            validateTable(conn, schemaName, tableName);
+            validateTable(conn, sName, tName);
         } finally {
             conn.close();
         }
@@ -247,7 +278,7 @@ public abstract class AbstractBulkLoadTool extends Configured implements Tool {
             }
         }
         // using conn after it's been closed... o.O
-        tablesToBeLoaded.addAll(getIndexTables(conn, schemaName, qualifiedTableName));
+        tablesToBeLoaded.addAll(getIndexTables(conn, qualifiedTableName));
 
         // When loading a single index table, check index table name is correct
         if (qualifiedIndexTableName != null){
@@ -410,7 +441,7 @@ public abstract class AbstractBulkLoadTool extends Configured implements Tool {
      * Get the index tables of current data table
      * @throws java.sql.SQLException
      */
-    private List<TargetTableRef> getIndexTables(Connection conn, String schemaName, String qualifiedTableName)
+    private List<TargetTableRef> getIndexTables(Connection conn, String qualifiedTableName)
             throws SQLException {
         PTable table = PhoenixRuntime.getTable(conn, qualifiedTableName);
         List<TargetTableRef> indexTables = new ArrayList<TargetTableRef>();