You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@phoenix.apache.org by ChinmaySKulkarni <gi...@git.apache.org> on 2018/12/01 00:06:13 UTC
[GitHub] phoenix pull request #404: PHOENIX-5025 Tool to clean up orphan views
Github user ChinmaySKulkarni commented on a diff in the pull request:
https://github.com/apache/phoenix/pull/404#discussion_r238037787
--- Diff: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/OrphanViewTool.java ---
@@ -0,0 +1,812 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.COLUMN_FAMILY;
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.COLUMN_NAME;
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.LINK_TYPE;
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_CATALOG_NAME;
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_CHILD_LINK_NAME;
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_NAME;
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_SCHEM;
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_TYPE;
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TENANT_ID;
+import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.VIEW_TYPE;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Properties;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.mapreduce.util.ConnectionUtil;
+import org.apache.phoenix.parse.DropTableStatement;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.query.QueryServicesOptions;
+import org.apache.phoenix.schema.MetaDataClient;
+import org.apache.phoenix.schema.PTable;
+import org.apache.phoenix.schema.PTableType;
+import org.apache.phoenix.schema.TableNotFoundException;
+import org.apache.phoenix.util.PhoenixRuntime;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A tool to identify orphan views and links, and drop them
+ *
+ */
+public class OrphanViewTool extends Configured implements Tool {
+ private static final Logger LOG = LoggerFactory.getLogger(OrphanViewTool.class);
+ // Query all the views that are not "MAPPED" views
+ private static final String viewQuery = "SELECT " +
+ TENANT_ID + ", " +
+ TABLE_SCHEM + "," +
+ TABLE_NAME +
+ " FROM " + SYSTEM_CATALOG_NAME +
+ " WHERE "+ TABLE_TYPE + " = '" + PTableType.VIEW.getSerializedValue() +"' AND NOT " +
+ VIEW_TYPE + " = " + PTable.ViewType.MAPPED.getSerializedValue();
+ // Query all physical links
+ private static final String physicalLinkQuery = "SELECT " +
+ TENANT_ID + ", " +
+ TABLE_SCHEM + ", " +
+ TABLE_NAME + ", " +
+ COLUMN_NAME + " AS PHYSICAL_TABLE_TENANT_ID, " +
+ COLUMN_FAMILY + " AS PHYSICAL_TABLE_FULL_NAME " +
+ " FROM " + SYSTEM_CATALOG_NAME +
+ " WHERE "+ LINK_TYPE + " = " +
+ PTable.LinkType.PHYSICAL_TABLE.getSerializedValue();
+ // Query all child-parent links
+ private static final String childParentLinkQuery = "SELECT " +
+ TENANT_ID + ", " +
+ TABLE_SCHEM + ", " +
+ TABLE_NAME + ", " +
+ COLUMN_NAME + " AS PARENT_VIEW_TENANT_ID, " +
+ COLUMN_FAMILY + " AS PARENT_VIEW_FULL_NAME " +
+ " FROM " + SYSTEM_CATALOG_NAME +
+ " WHERE "+ LINK_TYPE + " = " +
+ PTable.LinkType.PARENT_TABLE.getSerializedValue();
+ // Query all parent-child links
+ private static final String parentChildLinkQuery = "SELECT " +
+ TENANT_ID + ", " +
+ TABLE_SCHEM + ", " +
+ TABLE_NAME + ", " +
+ COLUMN_NAME + " AS CHILD_VIEW_TENANT_ID, " +
+ COLUMN_FAMILY + " AS CHILD_VIEW_FULL_NAME " +
+ " FROM " + SYSTEM_CHILD_LINK_NAME +
+ " WHERE "+ LINK_TYPE + " = " +
+ PTable.LinkType.CHILD_TABLE.getSerializedValue();
+
+ // Query all the tables that can be a base table
+ private static final String candidateBaseTableQuery = "SELECT " +
+ TENANT_ID + ", " +
+ TABLE_SCHEM + ", " +
+ TABLE_NAME +
+ " FROM " + SYSTEM_CATALOG_NAME +
+ " WHERE "+ " NOT " + TABLE_TYPE + " = '" + PTableType.VIEW.getSerializedValue() + "'";
+
+ private String outputPath;
+ private String inputPath;
+ private boolean clean = false;
+ private int maxViewLevel = 0;
+ private static final long defaultAge = 24*60*60*1000; // 1 day
+ private long age = 0;
+
+ private static final byte VIEW = 0;
+ private static final byte PHYSICAL_TABLE_LINK = 1;
+ private static final byte PARENT_TABLE_LINK = 2;
+ private static final byte CHILD_TABLE_LINK = 3;
+ private static final byte ORPHAN_TYPE_COUNT = 4;
+
+ BufferedWriter writer[] = new BufferedWriter[ORPHAN_TYPE_COUNT];
+ BufferedReader reader[] = new BufferedReader[ORPHAN_TYPE_COUNT];
+ HashMap<Key, View> orphanViewSet = new HashMap<>();
+ List<HashMap<Key, View>> viewSetArray = new ArrayList<HashMap<Key, View>>();
+ HashMap<Key, Base> baseSet = new HashMap<>();
+ HashSet<Link> orphanLinkSet = new HashSet<>();
+
+ private static final String fileName[] = {"OrphanView.txt", "OrphanPhysicalTableLink.txt", "OrphanParentTableLink", "OrphanChildTableLink"};
+ private static final Option OUTPUT_PATH_OPTION = new Option("op", "output-path", true,
+ "Output path where the files listing orphan views and links are written");
+ private static final Option INPUT_PATH_OPTION = new Option("ip", "input-path", true,
+ "Input path where the files listing orphan views and links are read");
+ private static final Option CLEAN_ORPHAN_VIEWS_OPTION = new Option("c", "clean", false,
+ "If specified, cleans orphan views and links");
+ private static final Option IDENTIFY_ORPHAN_VIEWS_OPTION = new Option("i", "identify", false,
+ "If specified, identifies orphan views and links");
+ private static final Option AGE_OPTION = new Option("a", "age", true,
+ "The minimum age (in milliseconds) for the views (default value is " + Long.toString(defaultAge) + ", i.e. 1 day)");
+ private static final Option HELP_OPTION = new Option("h", "help", false, "Help");
+
+ private Options getOptions() {
+ final Options options = new Options();
+ options.addOption(OUTPUT_PATH_OPTION);
+ options.addOption(INPUT_PATH_OPTION);
+ options.addOption(CLEAN_ORPHAN_VIEWS_OPTION);
+ options.addOption(IDENTIFY_ORPHAN_VIEWS_OPTION);
+ options.addOption(AGE_OPTION);
+ options.addOption(HELP_OPTION);
+ return options;
+ }
+
+ /**
+ * Parses the commandline arguments, throws IllegalStateException if mandatory arguments are
+ * missing.
+ * @param args supplied command line arguments
+ */
+ private void parseOptions(String[] args) throws Exception {
+
+ final Options options = getOptions();
+
+ CommandLineParser parser = new PosixParser();
+ CommandLine cmdLine = null;
+ try {
+ cmdLine = parser.parse(options, args);
+ } catch (ParseException e) {
+ printHelpAndExit("Error parsing command line options: " + e.getMessage(), options);
+ }
+ if (cmdLine.hasOption(HELP_OPTION.getOpt())) {
+ printHelpAndExit(options, 0);
+ }
+ if (cmdLine.hasOption(OUTPUT_PATH_OPTION.getOpt()) && cmdLine.hasOption(INPUT_PATH_OPTION.getOpt())) {
+ throw new IllegalStateException("Specify either " + OUTPUT_PATH_OPTION.getLongOpt() + " or "
+ + INPUT_PATH_OPTION.getOpt());
+ }
+ if (cmdLine.hasOption(INPUT_PATH_OPTION.getOpt()) && !cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt())) {
+ throw new IllegalStateException(INPUT_PATH_OPTION.getLongOpt() + " is only used with "
+ + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt());
+ }
+ if (cmdLine.hasOption(IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()) && cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt())) {
+ throw new IllegalStateException("Specify either " + IDENTIFY_ORPHAN_VIEWS_OPTION.getLongOpt() + " or "
+ + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt());
+ }
+ if (cmdLine.hasOption(OUTPUT_PATH_OPTION.getOpt()) && (!cmdLine.hasOption(IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()) &&
+ !cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt()))) {
+ throw new IllegalStateException(OUTPUT_PATH_OPTION.getLongOpt() + " requires either " +
+ IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt() + " or " + CLEAN_ORPHAN_VIEWS_OPTION.getOpt());
+ }
+ if (cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt())) {
+ clean = true;
+ }
+ else if (!cmdLine.hasOption(IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt())) {
+ throw new IllegalStateException("Specify either " +
+ IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt() + " or " + CLEAN_ORPHAN_VIEWS_OPTION.getOpt());
+ }
+ if (cmdLine.hasOption(AGE_OPTION.getOpt())) {
+ age = Long.valueOf(cmdLine.getOptionValue(AGE_OPTION.getOpt()));
+ }
+
+ outputPath = cmdLine.getOptionValue(OUTPUT_PATH_OPTION.getOpt());
+ inputPath = cmdLine.getOptionValue(INPUT_PATH_OPTION.getOpt());
+ }
+
+ private void printHelpAndExit(String errorMessage, Options options) {
+ System.err.println(errorMessage);
+ printHelpAndExit(options, 1);
+ }
+
+ private void printHelpAndExit(Options options, int exitCode) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("help", options);
+ System.exit(exitCode);
+ }
+
+ private class Key {
+ private String tenantId;
+ private String schemaName;
+ private String tableName;
+
+ public Key (String tenantId, String schemaName, String tableName) {
+ this.tenantId = tenantId;
+ this.schemaName = schemaName;
+ this.tableName = tableName;
+ }
+
+ public Key (String tenantId, String fullTableName) {
+ this.tenantId = tenantId;
+ String[] columns = fullTableName.split("\\.");
+ if (columns.length == 1) {
+ this.schemaName = null;
+ this.tableName = fullTableName;
+ } else {
+ this.schemaName = columns[0];
+ this.tableName = columns[1];
+ }
+ }
+
+ public Key (String serializedKey) {
+ String[] columns = serializedKey.split(",");
+ tenantId = columns[0].compareTo("") == 0 ? null : columns[0];
+ schemaName = columns[1].compareTo("") == 0 ? null : columns[1];
+ tableName = columns[2];
+ }
+
+ public String serialize () {
+ return (tenantId != null ? tenantId + "," : ",") +
+ (schemaName != null ? schemaName + "," : ",") +
+ tableName;
+ }
+ @Override
+ public int hashCode() {
+ return Objects.hash(serialize());
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (getClass() != obj.getClass())
+ return false;
+ Key other = (Key) obj;
+ if (this.serialize().compareTo(other.serialize()) != 0)
+ return false;
+ return true;
+ }
+ }
+
+ private abstract class Table {
+ protected Key key;
+ protected List<Key> childViews;
+
+ public void addChild(Key childView) {
+ if (childViews == null) {
+ childViews = new LinkedList<>();
+ }
+ childViews.add(childView);
+ }
+
+ public boolean isParent() {
+ if (childViews == null || childViews.isEmpty()) {
+ return false;
+ }
+ return true;
+ }
+ }
+
+ private class Base extends Table {
+ public Base (Key key) {
+ this.key = key;
+ }
+ }
+
+ private class View extends Table {
+ Key parent;
+ Key base;
+
+ public View (Key key) {
+ this.key = key;
+ }
+
+ public void setParent(Key parent) {
+ this.parent = parent;
+ }
+
+ public void setBase(Key base) {
+ this.base = base;
+ }
+ }
+
+ private class Link {
+ Key src;
+ Key dst;
+ PTable.LinkType type;
+
+ public Link(Key src, Key dst, PTable.LinkType type) {
+ this.src = src;
+ this.dst = dst;
+ this.type = type;
+ }
+
+ public String serialize () {
+ return src.serialize() + "," + dst.serialize() + "," + type.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(serialize());
+ }
+ }
+
+ private void gracefullyDropView(PhoenixConnection phoenixConnection, Configuration configuration,
+ Key key) throws Exception {
+ PhoenixConnection tenantConnection;
+ if (key.tenantId != null) {
+ Properties tenantProps = new Properties();
+ tenantProps.setProperty(PhoenixRuntime.TENANT_ID_ATTRIB, key.tenantId);
+ tenantConnection = ConnectionUtil.getInputConnection(configuration, tenantProps).
+ unwrap(PhoenixConnection.class);
+ } else {
+ tenantConnection = phoenixConnection;
+ }
+
+ MetaDataClient client = new MetaDataClient(tenantConnection);
+ org.apache.phoenix.parse.TableName pTableName = org.apache.phoenix.parse.TableName
+ .create(key.schemaName, key.tableName);
+ try {
+ client.dropTable(
+ new DropTableStatement(pTableName, PTableType.VIEW, false, true, true));
+ }
+ catch (TableNotFoundException e) {
+ LOG.info("Ignoring view " + pTableName + " as it has already been dropped");
+ }
+ }
+
+ private void removeLink(PhoenixConnection phoenixConnection, Key src, Key dst, PTable.LinkType linkType) throws Exception {
+ String deleteQuery = "DELETE FROM " +
+ ((linkType == PTable.LinkType.PHYSICAL_TABLE || linkType == PTable.LinkType.PARENT_TABLE) ? SYSTEM_CATALOG_NAME : SYSTEM_CHILD_LINK_NAME) +
+ " WHERE " + TENANT_ID + (src.tenantId == null ? " IS NULL" : " = '" + src.tenantId + "'") + " AND " +
+ TABLE_SCHEM + (src.schemaName == null ? " IS NULL " : " = '" + src.schemaName + "'") + " AND " +
+ TABLE_NAME + " = '" + src.tableName + "' AND " +
+ COLUMN_NAME + (dst.tenantId == null ? " IS NULL" : " = '" + dst.tenantId + "'") + " AND " +
+ COLUMN_FAMILY + " = '" + (dst.schemaName == null ? dst.tableName : dst.schemaName + "." +
+ dst.tableName) + "'";
+ phoenixConnection.createStatement().execute(deleteQuery);
+ phoenixConnection.commit();
+ }
+
+ private byte getLinkType(PTable.LinkType linkType) {
+ byte type;
+ if (linkType == PTable.LinkType.PHYSICAL_TABLE) {
+ type = PHYSICAL_TABLE_LINK;
+ }
+ else if (linkType == PTable.LinkType.PARENT_TABLE) {
+ type = PARENT_TABLE_LINK;
+ } else if (linkType == PTable.LinkType.CHILD_TABLE) {
+ type = CHILD_TABLE_LINK;
+ }
+ else {
+ throw new AssertionError("Unknown Link Type");
+ }
+ return type;
+ }
+
+ private PTable.LinkType getLinkType(byte linkType) {
+ PTable.LinkType type;
+ if (linkType == PHYSICAL_TABLE_LINK) {
+ type = PTable.LinkType.PHYSICAL_TABLE;
+ }
+ else if (linkType == PARENT_TABLE_LINK) {
+ type = PTable.LinkType.PARENT_TABLE;
+ } else if (linkType == CHILD_TABLE_LINK) {
+ type = PTable.LinkType.CHILD_TABLE;
+ }
+ else {
+ throw new AssertionError("Unknown Link Type");
+ }
+ return type;
+ }
+
+ private void removeOrLogOrphanLinks(PhoenixConnection phoenixConnection) {
+ for (Link link : orphanLinkSet) {
+ try {
+ byte linkType = getLinkType(link.type);
+ if (outputPath != null) {
+ writer[linkType].write(link.src.serialize() + "-->" + link.dst.serialize());
+ writer[linkType].newLine();
+ }
+ else if (!clean){
+ System.out.println(link.src.serialize() + "-(" + link.type + ")->" + link.dst.serialize());
+ }
+ if (clean) {
+ removeLink(phoenixConnection, link.src, link.dst, link.type);
+ }
+ } catch (Exception e) {
+ // ignore
+ }
+ }
+ }
+ private void forcefullyDropView(PhoenixConnection phoenixConnection,
+ Key key) throws Exception {
+ String deleteRowsFromCatalog = "DELETE FROM " + SYSTEM_CATALOG_NAME +
+ " WHERE " + TENANT_ID + (key.tenantId == null ? " IS NULL" : " = '" + key.tenantId + "'") + " AND " +
+ TABLE_SCHEM + (key.schemaName == null ? " IS NULL " : " = '" + key.schemaName + "'") + " AND " +
+ TABLE_NAME + " = '" + key.tableName + "'";
+ String deleteRowsFromChildLink = "DELETE FROM " + SYSTEM_CHILD_LINK_NAME +
+ " WHERE " + COLUMN_NAME + (key.tenantId == null ? " IS NULL" : " = '" + key.tenantId + "'") + " AND " +
+ COLUMN_FAMILY + " = '" + (key.schemaName == null ? key.tableName : key.schemaName + "." + key.tableName) + "'";
+ try {
+ phoenixConnection.createStatement().execute(deleteRowsFromCatalog);
+ phoenixConnection.createStatement().execute(deleteRowsFromChildLink);
+ phoenixConnection.commit();
+ } catch (SQLException e) {
+ throw new IOException(e);
+ }
+ }
+
+ private void dropOrLogOrphanViews(PhoenixConnection phoenixConnection, Configuration configuration,
+ Key key) throws Exception {
+ if (outputPath != null) {
+ writer[VIEW].write(key.serialize());
+ writer[VIEW].newLine();
+ }
+ else if (!clean) {
+ System.out.println(key.serialize());
+ return;
+ }
+ if (!clean) {
+ return;
+ }
+ gracefullyDropView(phoenixConnection, configuration, key);
+ }
+
+ /**
+ * Go through all the views in the system catalog table and add them to orphanViewSet
+ * @param phoenixConnection
+ * @throws Exception
+ */
+ private void populateOrphanViewSet(PhoenixConnection phoenixConnection)
+ throws Exception {
+ ResultSet viewRS = phoenixConnection.createStatement().executeQuery(viewQuery);
+ while (viewRS.next()) {
+ String tenantId = viewRS.getString(1);
+ String schemaName = viewRS.getString(2);
+ String tableName = viewRS.getString(3);
+ Key key = new Key(tenantId, schemaName, tableName);
+ View view = new View(key);
+ orphanViewSet.put(key, view);
+ }
+ }
+
+ /**
+ * Go through all the tables in the system catalog table and update baseSet
+ * @param phoenixConnection
+ * @throws Exception
+ */
+ private void populateBaseSet(PhoenixConnection phoenixConnection)
+ throws Exception {
+ ResultSet baseTableRS = phoenixConnection.createStatement().executeQuery(candidateBaseTableQuery);
+ while (baseTableRS.next()) {
+ String tenantId = baseTableRS.getString(1);
+ String schemaName = baseTableRS.getString(2);
+ String tableName = baseTableRS.getString(3);
+ Key key = new Key(tenantId, schemaName, tableName);
+ Base base = new Base(key);
+ baseSet.put(key, base);
+ }
+ }
+
+ /**
+ * Go through all the physical links in the system catalog table and update the base table info of the
+ * view objects in orphanViewSet. If the base or view object does not exist for a given link, then add the link
+ * to orphanLinkSet
+ * @param phoenixConnection
+ * @throws Exception
+ */
+ private void processPhysicalLinks(PhoenixConnection phoenixConnection)
+ throws Exception {
+ ResultSet physicalLinkRS = phoenixConnection.createStatement().executeQuery(physicalLinkQuery);
+ while (physicalLinkRS.next()) {
+ String tenantId = physicalLinkRS.getString(1);
+ String schemaName = physicalLinkRS.getString(2);
+ String tableName = physicalLinkRS.getString(3);
+ Key viewKey = new Key(tenantId, schemaName, tableName);
+ View view = orphanViewSet.get(viewKey);
+
+ String baseTenantId = physicalLinkRS.getString(4);
+ String baseFullTableName = physicalLinkRS.getString(5);
+ Key baseKey = new Key(baseTenantId, baseFullTableName);
+ Base base = baseSet.get(baseKey);
+
+ if (view == null || base == null) {
+ orphanLinkSet.add(new Link(viewKey, baseKey, PTable.LinkType.PHYSICAL_TABLE));
+ }
+ else {
+ view.setBase(baseKey);
+ }
+ }
+ }
+
+ /**
+ * Go through all the child-parent links in the system child-link table and update the parent field of the
--- End diff --
Nit: This comment should say "system catalog table" instead of "child-link", right?
---