You are viewing a plain text version of this content. The canonical link for it is here.

Posted to issues@flink.apache.org by GitBox <gi...@apache.org> on 2022/05/07 09:50:26 UTC

[GitHub] [flink-table-store] tsreaper opened a new pull request, #113: [FLINK-27347] Support reading external table store table in Hive

tsreaper opened a new pull request, #113:
URL: https://github.com/apache/flink-table-store/pull/113

   As the first step to support table store in Hive, we should allow Hive external table to read table store files created by other systems (for example Flink).


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] JingsongLi commented on a diff in pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

JingsongLi commented on code in PR #113:
URL: https://github.com/apache/flink-table-store/pull/113#discussion_r869942726


##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/mapred/TableStoreInputFormat.java:
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.mapred;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.catalog.ObjectIdentifier;
+import org.apache.flink.table.data.binary.BinaryRowData;
+import org.apache.flink.table.store.JobConfWrapper;
+import org.apache.flink.table.store.RowDataContainer;
+import org.apache.flink.table.store.file.FileStore;
+import org.apache.flink.table.store.file.FileStoreImpl;
+import org.apache.flink.table.store.file.FileStoreOptions;
+import org.apache.flink.table.store.file.data.DataFileMeta;
+import org.apache.flink.table.store.file.mergetree.compact.DeduplicateMergeFunction;
+import org.apache.flink.table.store.file.mergetree.compact.MergeFunction;
+import org.apache.flink.table.store.file.mergetree.compact.ValueCountMergeFunction;
+import org.apache.flink.table.store.file.operation.FileStoreRead;
+import org.apache.flink.table.store.file.operation.FileStoreScan;
+import org.apache.flink.table.types.logical.BigIntType;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * {@link InputFormat} for table store. It divides all files into {@link InputSplit}s (one split per
+ * bucket) and creates {@link RecordReader} for each split.
+ */
+public class TableStoreInputFormat implements InputFormat<Void, RowDataContainer> {
+
+    @Override
+    public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
+        FileStoreImpl store = getFileStore(jobConf);
+        FileStoreScan scan = store.newScan();
+        List<TableStoreInputSplit> result = new ArrayList<>();
+        for (Map.Entry<BinaryRowData, Map<Integer, List<DataFileMeta>>> pe :
+                scan.plan().groupByPartFiles().entrySet()) {
+            for (Map.Entry<Integer, List<DataFileMeta>> be : pe.getValue().entrySet()) {
+                BinaryRowData partition = pe.getKey();
+                int bucket = be.getKey();
+                String bucketPath =
+                        store.pathFactory()
+                                .createDataFilePathFactory(partition, bucket)
+                                .toPath("")
+                                .toString();
+                TableStoreInputSplit split =
+                        new TableStoreInputSplit(
+                                store.partitionType(),
+                                store.keyType(),
+                                store.valueType(),
+                                partition,
+                                bucket,
+                                be.getValue(),
+                                bucketPath);
+                result.add(split);
+            }
+        }
+        return result.toArray(new TableStoreInputSplit[0]);
+    }
+
+    @Override
+    public RecordReader<Void, RowDataContainer> getRecordReader(
+            InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
+        FileStore store = getFileStore(jobConf);
+        FileStoreRead read = store.newRead();
+        TableStoreInputSplit split = (TableStoreInputSplit) inputSplit;
+        org.apache.flink.table.store.file.utils.RecordReader wrapped =
+                read.withDropDelete(true)
+                        .createReader(split.partition(), split.bucket(), split.files());
+        long splitLength = split.getLength();
+        return new JobConfWrapper(jobConf).getPrimaryKeyNames().isPresent()
+                ? new TableStorePkRecordReader(wrapped, splitLength)
+                : new TableStoreCountRecordReader(wrapped, splitLength);
+    }
+
+    private FileStoreImpl getFileStore(JobConf jobConf) {
+        JobConfWrapper wrapper = new JobConfWrapper(jobConf);
+
+        String catalogName = wrapper.getCatalogName();
+        String dbName = wrapper.getDbName();
+        String tableName = wrapper.getTableName();
+        ObjectIdentifier identifier = ObjectIdentifier.of(catalogName, dbName, tableName);
+
+        Configuration fileStoreOptions = new Configuration();
+        fileStoreOptions.setString(FileStoreOptions.PATH, wrapper.getLocation());

Review Comment:
   The location should be table location.
   We can get rid of `catalogName` and `dbName` and `tableName`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] JingsongLi merged pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

JingsongLi merged PR #113:
URL: https://github.com/apache/flink-table-store/pull/113


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] JingsongLi commented on a diff in pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

JingsongLi commented on code in PR #113:
URL: https://github.com/apache/flink-table-store/pull/113#discussion_r871069928


##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/TableStoreJobConf.java:
##########
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store;
+
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.store.file.FileStoreOptions;
+import org.apache.flink.table.store.hive.HiveTypeUtils;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.utils.LogicalTypeParser;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.mapred.JobConf;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.stream.Collectors;
+
+/**
+ * Utility class to convert Hive table property keys and get file store specific configurations from
+ * {@link JobConf}.
+ */
+public class TableStoreJobConf {
+
+    private static final String TBLPROPERTIES_PREFIX = "table-store.";
+    private static final String TBLPROPERTIES_PRIMARY_KEYS = TBLPROPERTIES_PREFIX + "primary-keys";
+    private static final String INTERNAL_TBLPROPERTIES_PREFIX =
+            "table-store.internal.tblproperties.";
+
+    private static final String INTERNAL_DB_NAME = "table-store.internal.db-name";
+    private static final String INTERNAL_TABLE_NAME = "table-store.internal.table-name";
+    private static final String INTERNAL_LOCATION = "table-store.internal.location";
+
+    private static final String INTERNAL_COLUMN_NAMES = "table-store.internal.column-names";
+
+    private static final String INTERNAL_COLUMN_TYPES = "table-store.internal.column-types";
+    private static final String COLUMN_TYPES_SEPARATOR = "\0";
+
+    private static final String INTERNAL_PARTITION_COLUMN_NAMES =
+            "table-store.internal.partition-column-names";
+    private static final String INTERNAL_PRIMARY_KEYS = "table-store.internal.primary-keys";
+
+    private static final String INTERNAL_FILE_STORE_USER = "table-store.internal.file-store.user";
+
+    private final JobConf jobConf;
+
+    public TableStoreJobConf(JobConf jobConf) {
+        this.jobConf = jobConf;
+    }
+
+    public static void update(Properties properties, Map<String, String> map) {

Review Comment:
   `configureInputJobProperties`? Just keep full name.



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/TableStoreJobConf.java:
##########
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store;
+
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.store.file.FileStoreOptions;
+import org.apache.flink.table.store.hive.HiveTypeUtils;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.utils.LogicalTypeParser;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.mapred.JobConf;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.stream.Collectors;
+
+/**
+ * Utility class to convert Hive table property keys and get file store specific configurations from
+ * {@link JobConf}.
+ */
+public class TableStoreJobConf {
+
+    private static final String TBLPROPERTIES_PREFIX = "table-store.";
+    private static final String TBLPROPERTIES_PRIMARY_KEYS = TBLPROPERTIES_PREFIX + "primary-keys";
+    private static final String INTERNAL_TBLPROPERTIES_PREFIX =
+            "table-store.internal.tblproperties.";
+
+    private static final String INTERNAL_DB_NAME = "table-store.internal.db-name";
+    private static final String INTERNAL_TABLE_NAME = "table-store.internal.table-name";
+    private static final String INTERNAL_LOCATION = "table-store.internal.location";
+
+    private static final String INTERNAL_COLUMN_NAMES = "table-store.internal.column-names";
+
+    private static final String INTERNAL_COLUMN_TYPES = "table-store.internal.column-types";
+    private static final String COLUMN_TYPES_SEPARATOR = "\0";
+
+    private static final String INTERNAL_PARTITION_COLUMN_NAMES =
+            "table-store.internal.partition-column-names";
+    private static final String INTERNAL_PRIMARY_KEYS = "table-store.internal.primary-keys";
+
+    private static final String INTERNAL_FILE_STORE_USER = "table-store.internal.file-store.user";
+
+    private final JobConf jobConf;
+
+    public TableStoreJobConf(JobConf jobConf) {
+        this.jobConf = jobConf;
+    }
+
+    public static void update(Properties properties, Map<String, String> map) {
+        String tableNameString = properties.getProperty(hive_metastoreConstants.META_TABLE_NAME);

Review Comment:
   Does `tableNameString` still useful? It is just used for printing?
   Can we remove this usage?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] JingsongLi commented on a diff in pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

JingsongLi commented on code in PR #113:
URL: https://github.com/apache/flink-table-store/pull/113#discussion_r869926081


##########
flink-table-store-hive/pom.xml:
##########
@@ -0,0 +1,626 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <artifactId>flink-table-store-parent</artifactId>
+        <groupId>org.apache.flink</groupId>
+        <version>0.2-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>flink-table-store-hive</artifactId>
+    <name>Flink Table Store : Hive</name>
+
+    <packaging>jar</packaging>
+
+    <properties>
+        <hiverunner.version>4.0.0</hiverunner.version>
+        <reflections.version>0.9.8</reflections.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-store-core</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-core</artifactId>
+            <version>${flink.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-metrics-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.commons</groupId>
+                    <artifactId>commons-lang3</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.esotericsoftware.kryo</groupId>
+                    <artifactId>kryo</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-collections</groupId>
+                    <artifactId>commons-collections</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.commons</groupId>
+                    <artifactId>commons-compress</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-common</artifactId>
+            <version>${flink.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-api-java</artifactId>
+            <version>${flink.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-runtime</artifactId>
+            <version>${flink.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-common</artifactId>
+            <version>${hadoop.version}</version>
+            <scope>provided</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <version>${hadoop.version}</version>
+            <scope>provided</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-exec</artifactId>
+            <version>${hive.version}</version>
+            <scope>provided</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-annotations</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.orc</groupId>
+                    <artifactId>orc-core</artifactId>
+                </exclusion>
+                <!-- this dependency cannot be fetched from central maven repository anymore -->
+                <exclusion>
+                    <groupId>org.pentaho</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- codegen runtime dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-store-codegen-loader</artifactId>
+            <version>${project.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-streaming-java</artifactId>
+            <version>${flink.version}</version>
+            <scope>runtime</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- filesystem runtime dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-hadoop-fs</artifactId>
+            <version>${flink.version}</version>
+            <scope>runtime</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- format runtime dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-store-format</artifactId>
+            <version>${project.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-connector-files</artifactId>
+            <version>${flink.version}</version>
+            <scope>runtime</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- "runtime" runtime dependencies, mainly for quick sorting memory buffer -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-runtime</artifactId>
+            <version>${flink.version}</version>
+            <scope>runtime</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.xerial.snappy</groupId>
+            <artifactId>snappy-java</artifactId>
+            <version>${snappy.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.lz4</groupId>
+            <artifactId>lz4-java</artifactId>
+            <version>${lz4.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <!-- other runtime dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-shaded-jackson</artifactId>
+            <version>${flink.shaded.jackson.version}-${flink.shaded.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <!-- test dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-store-core</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+
+        <!--
+        IDEA reads classes from the same project from target/classes of that module,
+        so even though we've packaged and shaded avro classes into flink-table-store-format.jar
+        we still have to include this test dependency here.
+        -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-sql-avro</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+        </dependency>
+
+        <!-- dependencies for IT cases -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-connector-hive_${scala.binary.version}</artifactId>

Review Comment:
   Which itcase will depend flink hive?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] JingsongLi commented on a diff in pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

JingsongLi commented on code in PR #113:
URL: https://github.com/apache/flink-table-store/pull/113#discussion_r867465627


##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/JobConfWrapper.java:
##########
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store;
+
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.store.file.FileStoreOptions;
+import org.apache.flink.table.store.hive.TypeUtils;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.utils.LogicalTypeParser;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.mapred.JobConf;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.stream.Collectors;
+
+/**
+ * Utility class to convert Hive table property keys and get file store specific configurations from
+ * {@link JobConf}.
+ */
+public class JobConfWrapper {

Review Comment:
   NIT: rename this to `TableStoreJobConf`? 
   Because `JobConfWrapper` in Flink is wrap `JobConf` to `Serializable` class, this side does not mean this, which is easy to misunderstand



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/hive/objectinspector/TableStoreDecimalObjectInspector.java:
##########
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.hive.objectinspector;
+
+import org.apache.flink.table.data.DecimalData;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+
+/** {@link AbstractPrimitiveJavaObjectInspector} for DECIMAL type. */
+public class TableStoreDecimalObjectInspector extends AbstractPrimitiveJavaObjectInspector
+        implements HiveDecimalObjectInspector {
+
+    public TableStoreDecimalObjectInspector(DecimalTypeInfo typeInfo) {
+        super(typeInfo);
+    }
+
+    @Override
+    public HiveDecimal getPrimitiveJavaObject(Object o) {
+        return o == null ? null : HiveDecimal.create(((DecimalData) o).toBigDecimal());
+    }
+
+    @Override
+    public HiveDecimalWritable getPrimitiveWritableObject(Object o) {
+        HiveDecimal decimal = getPrimitiveJavaObject(o);
+        return decimal == null ? null : new HiveDecimalWritable(decimal);
+    }
+
+    @Override
+    public Object copyObject(Object o) {
+        if (o instanceof DecimalData) {
+            DecimalData decimalData = (DecimalData) o;
+            return DecimalData.fromBigDecimal(

Review Comment:
   `decimalData.copy()`



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/hive/TableStoreSerDe.java:
##########
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.hive;
+
+import org.apache.flink.table.store.RowDataContainer;
+import org.apache.flink.table.store.hive.objectinspector.TableStoreRowDataObjectInspector;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.Writable;
+
+import javax.annotation.Nullable;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * {@link AbstractSerDe} for table store. It transforms map-reduce values to Hive objects.
+ *
+ * <p>Currently this class only supports deserialization.
+ */
+public class TableStoreSerDe extends AbstractSerDe {
+
+    private TableStoreRowDataObjectInspector inspector;
+
+    @Override
+    public void initialize(@Nullable Configuration configuration, Properties properties)
+            throws SerDeException {
+        String columnNames = properties.getProperty(serdeConstants.LIST_COLUMNS);

Review Comment:
   Add a method `extractHiveSchema`? We can add a TODO here: `extract schema from table store`



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/hive/TypeUtils.java:
##########
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.hive;
+
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.store.hive.objectinspector.TableStoreCharObjectInspector;
+import org.apache.flink.table.store.hive.objectinspector.TableStoreDateObjectInspector;
+import org.apache.flink.table.store.hive.objectinspector.TableStoreDecimalObjectInspector;
+import org.apache.flink.table.store.hive.objectinspector.TableStoreListObjectInspector;
+import org.apache.flink.table.store.hive.objectinspector.TableStoreMapObjectInspector;
+import org.apache.flink.table.store.hive.objectinspector.TableStoreStringObjectInspector;
+import org.apache.flink.table.store.hive.objectinspector.TableStoreTimestampObjectInspector;
+import org.apache.flink.table.store.hive.objectinspector.TableStoreVarcharObjectInspector;
+import org.apache.flink.table.types.DataType;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+
+/** Utils for converting types related classes between Flink and Hive. */
+public class TypeUtils {

Review Comment:
   `HiveTypeUtils`



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/mapred/AbstractTableStoreRecordReader.java:
##########
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.mapred;
+
+import org.apache.flink.table.store.RowDataContainer;
+import org.apache.flink.table.store.file.utils.RecordReaderIterator;
+
+import org.apache.hadoop.mapred.RecordReader;
+
+import java.io.IOException;
+
+/**
+ * Base {@link RecordReader} for table store. Reads {@link
+ * org.apache.flink.table.store.file.KeyValue}s from data files and picks out {@link
+ * org.apache.flink.table.data.RowData} for Hive to consume.
+ */
+public abstract class AbstractTableStoreRecordReader
+        implements RecordReader<Void, RowDataContainer> {
+
+    protected final RecordReaderIterator iterator;
+    private final long splitLength;
+
+    protected float progress;
+
+    public AbstractTableStoreRecordReader(
+            org.apache.flink.table.store.file.utils.RecordReader wrapped, long splitLength) {
+        this.iterator = new RecordReaderIterator(wrapped);
+        this.splitLength = splitLength;
+        this.progress = 0;
+    }
+
+    @Override
+    public Void createKey() {
+        return null;
+    }
+
+    @Override
+    public RowDataContainer createValue() {
+        return new RowDataContainer();
+    }
+
+    @Override
+    public long getPos() throws IOException {
+        return (long) (splitLength * getProgress());
+    }
+
+    @Override
+    public void close() throws IOException {
+        try {
+            iterator.close();
+        } catch (Exception e) {
+            throw new IOException(e);
+        }
+    }
+
+    @Override
+    public float getProgress() throws IOException {
+        // TODO make this more precise
+        return progress;

Review Comment:
   Add a comment document to explain this progress now. Just 0 and 1?



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/mapred/TableStoreInputFormat.java:
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.mapred;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.catalog.ObjectIdentifier;
+import org.apache.flink.table.data.binary.BinaryRowData;
+import org.apache.flink.table.store.JobConfWrapper;
+import org.apache.flink.table.store.RowDataContainer;
+import org.apache.flink.table.store.file.FileStore;
+import org.apache.flink.table.store.file.FileStoreImpl;
+import org.apache.flink.table.store.file.FileStoreOptions;
+import org.apache.flink.table.store.file.data.DataFileMeta;
+import org.apache.flink.table.store.file.mergetree.compact.DeduplicateMergeFunction;
+import org.apache.flink.table.store.file.mergetree.compact.MergeFunction;
+import org.apache.flink.table.store.file.mergetree.compact.ValueCountMergeFunction;
+import org.apache.flink.table.store.file.operation.FileStoreRead;
+import org.apache.flink.table.store.file.operation.FileStoreScan;
+import org.apache.flink.table.types.logical.BigIntType;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * {@link InputFormat} for table store. It divides all files into {@link InputSplit}s (one split per
+ * bucket) and creates {@link RecordReader} for each split.
+ */
+public class TableStoreInputFormat implements InputFormat<Void, RowDataContainer> {
+
+    @Override
+    public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
+        FileStoreImpl store = getFileStore(jobConf);
+        FileStoreScan scan = store.newScan();
+        List<TableStoreInputSplit> result = new ArrayList<>();
+        for (Map.Entry<BinaryRowData, Map<Integer, List<DataFileMeta>>> pe :
+                scan.plan().groupByPartFiles().entrySet()) {
+            for (Map.Entry<Integer, List<DataFileMeta>> be : pe.getValue().entrySet()) {
+                BinaryRowData partition = pe.getKey();
+                int bucket = be.getKey();
+                String bucketPath =
+                        store.pathFactory()
+                                .createDataFilePathFactory(partition, bucket)
+                                .toPath("")

Review Comment:
   expose a method in `DataFilePathFactory`: `bucketPath`?



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/mapred/TableStoreInputSplit.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.mapred;
+
+import org.apache.flink.core.memory.DataInputViewStreamWrapper;
+import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
+import org.apache.flink.table.data.binary.BinaryRowData;
+import org.apache.flink.table.runtime.typeutils.RowDataSerializer;
+import org.apache.flink.table.store.file.data.DataFileMeta;
+import org.apache.flink.table.store.file.data.DataFileMetaSerializer;
+import org.apache.flink.table.types.logical.RowType;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileSplit;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * {@link FileSplit} for table store. It contains all files to read from a certain partition and
+ * bucket.
+ */
+public class TableStoreInputSplit extends FileSplit {

Review Comment:
   Do we need to use `FileSplit`?
   Can we use `InputSplit`?



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/RowDataContainer.java:
##########
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store;
+
+import org.apache.flink.table.data.RowData;
+
+import org.apache.hadoop.io.Writable;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/** A reusable object to hold {@link RowData}. */
+public class RowDataContainer implements Writable {
+
+    private RowData rowData;
+
+    public RowData get() {
+        return rowData;
+    }
+
+    public void set(RowData rowData) {
+        this.rowData = rowData;
+    }
+
+    @Override
+    public void write(DataOutput dataOutput) throws IOException {
+        throw new UnsupportedOperationException();

Review Comment:
   What situation will go here? Add document?



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/hive/objectinspector/TableStoreTimestampObjectInspector.java:
##########
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.hive.objectinspector;
+
+import org.apache.flink.table.data.TimestampData;
+
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import java.sql.Timestamp;
+
+/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. */
+public class TableStoreTimestampObjectInspector extends AbstractPrimitiveJavaObjectInspector
+        implements TimestampObjectInspector {
+
+    public TableStoreTimestampObjectInspector() {
+        super(TypeInfoFactory.timestampTypeInfo);
+    }
+
+    @Override
+    public Timestamp getPrimitiveJavaObject(Object o) {
+        return o == null ? null : ((TimestampData) o).toTimestamp();
+    }
+
+    @Override
+    public TimestampWritable getPrimitiveWritableObject(Object o) {
+        Timestamp ts = getPrimitiveJavaObject(o);
+        return ts == null ? null : new TimestampWritable(ts);
+    }
+
+    @Override
+    public Object copyObject(Object o) {
+        if (o instanceof TimestampData) {
+            TimestampData timestampData = (TimestampData) o;
+            return TimestampData.fromLocalDateTime(timestampData.toLocalDateTime());

Review Comment:
   `TimestampData` is an immutable class.



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/mapred/TableStoreCountRecordReader.java:
##########
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.mapred;
+
+import org.apache.flink.table.store.RowDataContainer;
+import org.apache.flink.table.store.file.KeyValue;
+import org.apache.flink.table.store.file.utils.RecordReader;
+
+import java.io.IOException;
+
+/** An {@link AbstractTableStoreRecordReader} for table without primary keys. */
+public class TableStoreCountRecordReader extends AbstractTableStoreRecordReader {
+
+    private KeyValue current;
+    private long remainingCount;
+
+    public TableStoreCountRecordReader(RecordReader wrapped, long splitLength) {
+        super(wrapped, splitLength);
+        this.current = null;
+        this.remainingCount = 0;
+    }
+
+    @Override
+    public boolean next(Void key, RowDataContainer value) throws IOException {
+        if (remainingCount > 0) {

Review Comment:
   I think these codes can be put into flink-table-store-core to be reused. 
   Actually, the connector only needs to see an iterator.



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/mapred/TableStoreInputFormat.java:
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.mapred;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.catalog.ObjectIdentifier;
+import org.apache.flink.table.data.binary.BinaryRowData;
+import org.apache.flink.table.store.JobConfWrapper;
+import org.apache.flink.table.store.RowDataContainer;
+import org.apache.flink.table.store.file.FileStore;
+import org.apache.flink.table.store.file.FileStoreImpl;
+import org.apache.flink.table.store.file.FileStoreOptions;
+import org.apache.flink.table.store.file.data.DataFileMeta;
+import org.apache.flink.table.store.file.mergetree.compact.DeduplicateMergeFunction;
+import org.apache.flink.table.store.file.mergetree.compact.MergeFunction;
+import org.apache.flink.table.store.file.mergetree.compact.ValueCountMergeFunction;
+import org.apache.flink.table.store.file.operation.FileStoreRead;
+import org.apache.flink.table.store.file.operation.FileStoreScan;
+import org.apache.flink.table.types.logical.BigIntType;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * {@link InputFormat} for table store. It divides all files into {@link InputSplit}s (one split per
+ * bucket) and creates {@link RecordReader} for each split.
+ */
+public class TableStoreInputFormat implements InputFormat<Void, RowDataContainer> {
+
+    @Override
+    public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
+        FileStoreImpl store = getFileStore(jobConf);
+        FileStoreScan scan = store.newScan();
+        List<TableStoreInputSplit> result = new ArrayList<>();
+        for (Map.Entry<BinaryRowData, Map<Integer, List<DataFileMeta>>> pe :
+                scan.plan().groupByPartFiles().entrySet()) {
+            for (Map.Entry<Integer, List<DataFileMeta>> be : pe.getValue().entrySet()) {
+                BinaryRowData partition = pe.getKey();
+                int bucket = be.getKey();
+                String bucketPath =
+                        store.pathFactory()
+                                .createDataFilePathFactory(partition, bucket)
+                                .toPath("")
+                                .toString();
+                TableStoreInputSplit split =
+                        new TableStoreInputSplit(
+                                store.partitionType(),
+                                store.keyType(),
+                                store.valueType(),
+                                partition,
+                                bucket,
+                                be.getValue(),
+                                bucketPath);
+                result.add(split);
+            }
+        }
+        return result.toArray(new TableStoreInputSplit[0]);
+    }
+
+    @Override
+    public RecordReader<Void, RowDataContainer> getRecordReader(
+            InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
+        FileStore store = getFileStore(jobConf);
+        FileStoreRead read = store.newRead();
+        TableStoreInputSplit split = (TableStoreInputSplit) inputSplit;
+        org.apache.flink.table.store.file.utils.RecordReader wrapped =
+                read.withDropDelete(true)
+                        .createReader(split.partition(), split.bucket(), split.files());
+        long splitLength = split.getLength();
+        return new JobConfWrapper(jobConf).getPrimaryKeyNames().isPresent()
+                ? new TableStorePkRecordReader(wrapped, splitLength)
+                : new TableStoreCountRecordReader(wrapped, splitLength);
+    }
+
+    private FileStoreImpl getFileStore(JobConf jobConf) {

Review Comment:
   NIT: createFileStore



##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/mapred/TableStoreInputFormat.java:
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.mapred;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.catalog.ObjectIdentifier;
+import org.apache.flink.table.data.binary.BinaryRowData;
+import org.apache.flink.table.store.JobConfWrapper;
+import org.apache.flink.table.store.RowDataContainer;
+import org.apache.flink.table.store.file.FileStore;
+import org.apache.flink.table.store.file.FileStoreImpl;
+import org.apache.flink.table.store.file.FileStoreOptions;
+import org.apache.flink.table.store.file.data.DataFileMeta;
+import org.apache.flink.table.store.file.mergetree.compact.DeduplicateMergeFunction;
+import org.apache.flink.table.store.file.mergetree.compact.MergeFunction;
+import org.apache.flink.table.store.file.mergetree.compact.ValueCountMergeFunction;
+import org.apache.flink.table.store.file.operation.FileStoreRead;
+import org.apache.flink.table.store.file.operation.FileStoreScan;
+import org.apache.flink.table.types.logical.BigIntType;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * {@link InputFormat} for table store. It divides all files into {@link InputSplit}s (one split per
+ * bucket) and creates {@link RecordReader} for each split.
+ */
+public class TableStoreInputFormat implements InputFormat<Void, RowDataContainer> {
+
+    @Override
+    public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
+        FileStoreImpl store = getFileStore(jobConf);
+        FileStoreScan scan = store.newScan();
+        List<TableStoreInputSplit> result = new ArrayList<>();
+        for (Map.Entry<BinaryRowData, Map<Integer, List<DataFileMeta>>> pe :
+                scan.plan().groupByPartFiles().entrySet()) {
+            for (Map.Entry<Integer, List<DataFileMeta>> be : pe.getValue().entrySet()) {
+                BinaryRowData partition = pe.getKey();
+                int bucket = be.getKey();
+                String bucketPath =
+                        store.pathFactory()
+                                .createDataFilePathFactory(partition, bucket)
+                                .toPath("")
+                                .toString();
+                TableStoreInputSplit split =
+                        new TableStoreInputSplit(
+                                store.partitionType(),
+                                store.keyType(),
+                                store.valueType(),
+                                partition,
+                                bucket,
+                                be.getValue(),
+                                bucketPath);
+                result.add(split);
+            }
+        }
+        return result.toArray(new TableStoreInputSplit[0]);
+    }
+
+    @Override
+    public RecordReader<Void, RowDataContainer> getRecordReader(
+            InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
+        FileStore store = getFileStore(jobConf);
+        FileStoreRead read = store.newRead();
+        TableStoreInputSplit split = (TableStoreInputSplit) inputSplit;
+        org.apache.flink.table.store.file.utils.RecordReader wrapped =
+                read.withDropDelete(true)
+                        .createReader(split.partition(), split.bucket(), split.files());
+        long splitLength = split.getLength();
+        return new JobConfWrapper(jobConf).getPrimaryKeyNames().isPresent()
+                ? new TableStorePkRecordReader(wrapped, splitLength)
+                : new TableStoreCountRecordReader(wrapped, splitLength);
+    }
+
+    private FileStoreImpl getFileStore(JobConf jobConf) {
+        JobConfWrapper wrapper = new JobConfWrapper(jobConf);
+
+        String catalogName = wrapper.getCatalogName();
+        String dbName = wrapper.getDbName();
+        String tableName = wrapper.getTableName();
+        ObjectIdentifier identifier = ObjectIdentifier.of(catalogName, dbName, tableName);
+
+        Configuration fileStoreOptions = new Configuration();
+        fileStoreOptions.setString(FileStoreOptions.PATH, wrapper.getLocation());
+        wrapper.updateFileStoreOptions(fileStoreOptions);
+
+        String user = wrapper.getFileStoreUser();
+
+        List<String> columnNames = wrapper.getColumnNames();
+        List<LogicalType> columnTypes = wrapper.getColumnTypes();
+
+        List<String> partitionColumnNames = wrapper.getPartitionColumnNames();
+
+        RowType rowType =
+                RowType.of(
+                        columnTypes.toArray(new LogicalType[0]),
+                        columnNames.toArray(new String[0]));
+        LogicalType[] partitionLogicalTypes =
+                partitionColumnNames.stream()
+                        .map(s -> columnTypes.get(columnNames.indexOf(s)))
+                        .toArray(LogicalType[]::new);
+        RowType partitionType =
+                RowType.of(partitionLogicalTypes, partitionColumnNames.toArray(new String[0]));
+
+        // same implementation as org.apache.flink.table.store.connector.TableStore#buildFileStore
+        RowType keyType;
+        RowType valueType;
+        MergeFunction mergeFunction;
+        Optional<List<String>> optionalPrimaryKeyNames = wrapper.getPrimaryKeyNames();
+        if (optionalPrimaryKeyNames.isPresent()) {
+            List<String> primaryKeyNames = optionalPrimaryKeyNames.get();

Review Comment:
   I think these logical can be in `flink-table-store-core` too.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] tsreaper commented on a diff in pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

tsreaper commented on code in PR #113:
URL: https://github.com/apache/flink-table-store/pull/113#discussion_r871087727


##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/TableStoreJobConf.java:
##########
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store;
+
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.store.file.FileStoreOptions;
+import org.apache.flink.table.store.hive.HiveTypeUtils;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.utils.LogicalTypeParser;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.mapred.JobConf;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.stream.Collectors;
+
+/**
+ * Utility class to convert Hive table property keys and get file store specific configurations from
+ * {@link JobConf}.
+ */
+public class TableStoreJobConf {
+
+    private static final String TBLPROPERTIES_PREFIX = "table-store.";
+    private static final String TBLPROPERTIES_PRIMARY_KEYS = TBLPROPERTIES_PREFIX + "primary-keys";
+    private static final String INTERNAL_TBLPROPERTIES_PREFIX =
+            "table-store.internal.tblproperties.";
+
+    private static final String INTERNAL_DB_NAME = "table-store.internal.db-name";
+    private static final String INTERNAL_TABLE_NAME = "table-store.internal.table-name";
+    private static final String INTERNAL_LOCATION = "table-store.internal.location";
+
+    private static final String INTERNAL_COLUMN_NAMES = "table-store.internal.column-names";
+
+    private static final String INTERNAL_COLUMN_TYPES = "table-store.internal.column-types";
+    private static final String COLUMN_TYPES_SEPARATOR = "\0";
+
+    private static final String INTERNAL_PARTITION_COLUMN_NAMES =
+            "table-store.internal.partition-column-names";
+    private static final String INTERNAL_PRIMARY_KEYS = "table-store.internal.primary-keys";
+
+    private static final String INTERNAL_FILE_STORE_USER = "table-store.internal.file-store.user";
+
+    private final JobConf jobConf;
+
+    public TableStoreJobConf(JobConf jobConf) {
+        this.jobConf = jobConf;
+    }
+
+    public static void update(Properties properties, Map<String, String> map) {
+        String tableNameString = properties.getProperty(hive_metastoreConstants.META_TABLE_NAME);

Review Comment:
   In the future we'd like to support hive catalog in table store. For that use case database name and table name will be crucial to locate the storage directory of a table. I'd like to just leave it like this so I won't forget which keys should be used to read these information out of Hive's properties.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] JingsongLi commented on a diff in pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

JingsongLi commented on code in PR #113:
URL: https://github.com/apache/flink-table-store/pull/113#discussion_r870135174


##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/hive/objectinspector/TableStoreTimestampObjectInspector.java:
##########
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.hive.objectinspector;
+
+import org.apache.flink.table.data.TimestampData;
+
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import java.sql.Timestamp;
+
+/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. */
+public class TableStoreTimestampObjectInspector extends AbstractPrimitiveJavaObjectInspector
+        implements TimestampObjectInspector {
+
+    public TableStoreTimestampObjectInspector() {
+        super(TypeInfoFactory.timestampTypeInfo);
+    }
+
+    @Override
+    public Timestamp getPrimitiveJavaObject(Object o) {
+        return o == null ? null : ((TimestampData) o).toTimestamp();
+    }
+
+    @Override
+    public TimestampWritable getPrimitiveWritableObject(Object o) {
+        Timestamp ts = getPrimitiveJavaObject(o);
+        return ts == null ? null : new TimestampWritable(ts);
+    }
+
+    @Override
+    public Object copyObject(Object o) {
+        if (o instanceof TimestampData) {
+            TimestampData timestampData = (TimestampData) o;
+            return TimestampData.fromLocalDateTime(timestampData.toLocalDateTime());

Review Comment:
   Why we need to copy a immutable class, like `Integer`, `String`, and etc...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] tsreaper commented on a diff in pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

tsreaper commented on code in PR #113:
URL: https://github.com/apache/flink-table-store/pull/113#discussion_r869993438


##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/hive/objectinspector/TableStoreTimestampObjectInspector.java:
##########
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.hive.objectinspector;
+
+import org.apache.flink.table.data.TimestampData;
+
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import java.sql.Timestamp;
+
+/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. */
+public class TableStoreTimestampObjectInspector extends AbstractPrimitiveJavaObjectInspector
+        implements TimestampObjectInspector {
+
+    public TableStoreTimestampObjectInspector() {
+        super(TypeInfoFactory.timestampTypeInfo);
+    }
+
+    @Override
+    public Timestamp getPrimitiveJavaObject(Object o) {
+        return o == null ? null : ((TimestampData) o).toTimestamp();
+    }
+
+    @Override
+    public TimestampWritable getPrimitiveWritableObject(Object o) {
+        Timestamp ts = getPrimitiveJavaObject(o);
+        return ts == null ? null : new TimestampWritable(ts);
+    }
+
+    @Override
+    public Object copyObject(Object o) {
+        if (o instanceof TimestampData) {
+            TimestampData timestampData = (TimestampData) o;
+            return TimestampData.fromLocalDateTime(timestampData.toLocalDateTime());

Review Comment:
   We can change this if this becomes a bottleneck. For now it would be better to really copy an object to comply with the method name.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] tsreaper commented on a diff in pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

tsreaper commented on code in PR #113:
URL: https://github.com/apache/flink-table-store/pull/113#discussion_r869971898


##########
flink-table-store-hive/src/main/java/org/apache/flink/table/store/mapred/TableStoreInputSplit.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.mapred;
+
+import org.apache.flink.core.memory.DataInputViewStreamWrapper;
+import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
+import org.apache.flink.table.data.binary.BinaryRowData;
+import org.apache.flink.table.runtime.typeutils.RowDataSerializer;
+import org.apache.flink.table.store.file.data.DataFileMeta;
+import org.apache.flink.table.store.file.data.DataFileMetaSerializer;
+import org.apache.flink.table.types.logical.RowType;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileSplit;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * {@link FileSplit} for table store. It contains all files to read from a certain partition and
+ * bucket.
+ */
+public class TableStoreInputSplit extends FileSplit {

Review Comment:
   You can't. `FileSplit` is required by `HiveInputSplit`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [flink-table-store] tsreaper commented on a diff in pull request #113: [FLINK-27347] Support reading external table store table in Hive

Posted by GitBox <gi...@apache.org>.

tsreaper commented on code in PR #113:
URL: https://github.com/apache/flink-table-store/pull/113#discussion_r870017642


##########
flink-table-store-hive/pom.xml:
##########
@@ -0,0 +1,626 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <artifactId>flink-table-store-parent</artifactId>
+        <groupId>org.apache.flink</groupId>
+        <version>0.2-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>flink-table-store-hive</artifactId>
+    <name>Flink Table Store : Hive</name>
+
+    <packaging>jar</packaging>
+
+    <properties>
+        <hiverunner.version>4.0.0</hiverunner.version>
+        <reflections.version>0.9.8</reflections.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-store-core</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-core</artifactId>
+            <version>${flink.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-metrics-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.commons</groupId>
+                    <artifactId>commons-lang3</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.esotericsoftware.kryo</groupId>
+                    <artifactId>kryo</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-collections</groupId>
+                    <artifactId>commons-collections</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.commons</groupId>
+                    <artifactId>commons-compress</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-common</artifactId>
+            <version>${flink.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-api-java</artifactId>
+            <version>${flink.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-runtime</artifactId>
+            <version>${flink.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-common</artifactId>
+            <version>${hadoop.version}</version>
+            <scope>provided</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <version>${hadoop.version}</version>
+            <scope>provided</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-exec</artifactId>
+            <version>${hive.version}</version>
+            <scope>provided</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-annotations</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.orc</groupId>
+                    <artifactId>orc-core</artifactId>
+                </exclusion>
+                <!-- this dependency cannot be fetched from central maven repository anymore -->
+                <exclusion>
+                    <groupId>org.pentaho</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- codegen runtime dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-store-codegen-loader</artifactId>
+            <version>${project.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-streaming-java</artifactId>
+            <version>${flink.version}</version>
+            <scope>runtime</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- filesystem runtime dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-hadoop-fs</artifactId>
+            <version>${flink.version}</version>
+            <scope>runtime</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- format runtime dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-store-format</artifactId>
+            <version>${project.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-connector-files</artifactId>
+            <version>${flink.version}</version>
+            <scope>runtime</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- "runtime" runtime dependencies, mainly for quick sorting memory buffer -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-runtime</artifactId>
+            <version>${flink.version}</version>
+            <scope>runtime</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>*</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.xerial.snappy</groupId>
+            <artifactId>snappy-java</artifactId>
+            <version>${snappy.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.lz4</groupId>
+            <artifactId>lz4-java</artifactId>
+            <version>${lz4.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <!-- other runtime dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-shaded-jackson</artifactId>
+            <version>${flink.shaded.jackson.version}-${flink.shaded.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <!-- test dependencies -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-store-core</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
+        </dependency>
+
+        <!--
+        IDEA reads classes from the same project from target/classes of that module,
+        so even though we've packaged and shaded avro classes into flink-table-store-format.jar
+        we still have to include this test dependency here.
+        -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-sql-avro</artifactId>
+            <version>${flink.version}</version>
+            <scope>test</scope>
+        </dependency>
+
+        <!-- dependencies for IT cases -->
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-connector-hive_${scala.binary.version}</artifactId>

Review Comment:
   `TableStoreHiveStorageHandlerITCase`. We need `FlinkEmbeddedHiveRunner`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org