You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hawq.apache.org by shivzone <gi...@git.apache.org> on 2018/01/02 19:39:59 UTC

[GitHub] incubator-hawq pull request #1326: HAWQ-1575. Implemented readable Parquet p...

Github user shivzone commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/1326#discussion_r159298191
  
    --- Diff: pxf/pxf-hdfs/src/main/java/org/apache/hawq/pxf/plugins/hdfs/ParquetDataFragmenter.java ---
    @@ -0,0 +1,103 @@
    +package org.apache.hawq.pxf.plugins.hdfs;
    +
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * 
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + * 
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.mapreduce.InputSplit;
    +import org.apache.hadoop.mapreduce.lib.input.FileSplit;
    +import org.apache.hadoop.mapred.JobConf;
    +import org.apache.hadoop.mapreduce.Job;
    +import org.apache.hawq.pxf.api.Fragment;
    +import org.apache.hawq.pxf.api.Fragmenter;
    +import org.apache.hawq.pxf.api.utilities.InputData;
    +import org.apache.hawq.pxf.plugins.hdfs.utilities.HdfsUtilities;
    +import org.apache.parquet.format.converter.ParquetMetadataConverter;
    +import org.apache.parquet.hadoop.ParquetFileReader;
    +import org.apache.parquet.hadoop.ParquetInputFormat;
    +import org.apache.parquet.example.data.Group;
    +import org.apache.parquet.hadoop.metadata.ParquetMetadata;
    +import org.apache.parquet.schema.MessageType;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.List;
    +
    +public class ParquetDataFragmenter extends Fragmenter {
    +    private Job job;
    +
    +    public ParquetDataFragmenter(InputData md) {
    +        super(md);
    +        JobConf jobConf = new JobConf(new Configuration(), ParquetDataFragmenter.class);
    +        try {
    +            job = Job.getInstance(jobConf);
    +        } catch (IOException e) {
    +            throw new RuntimeException("Unable to instantiate a job for reading fragments", e);
    +        }
    +    }
    +
    +
    +    @Override
    --- End diff --
    
    Comments would be useful describing the components of the Fragment data here


---