You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2015/04/28 01:12:29 UTC
[32/51] [partial] parquet-mr git commit: PARQUET-23: Rename to
org.apache.parquet.
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/dictionary/PlainValuesDictionary.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/dictionary/PlainValuesDictionary.java b/parquet-column/src/main/java/parquet/column/values/dictionary/PlainValuesDictionary.java
deleted file mode 100644
index 34f549a..0000000
--- a/parquet-column/src/main/java/parquet/column/values/dictionary/PlainValuesDictionary.java
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.dictionary;
-
-import static parquet.bytes.BytesUtils.readIntLittleEndian;
-import static parquet.column.Encoding.PLAIN_DICTIONARY;
-import static parquet.column.Encoding.PLAIN;
-
-import java.io.IOException;
-
-import parquet.Preconditions;
-import parquet.column.Dictionary;
-import parquet.column.page.DictionaryPage;
-import parquet.column.values.plain.PlainValuesReader.DoublePlainValuesReader;
-import parquet.column.values.plain.PlainValuesReader.FloatPlainValuesReader;
-import parquet.column.values.plain.PlainValuesReader.IntegerPlainValuesReader;
-import parquet.column.values.plain.PlainValuesReader.LongPlainValuesReader;
-import parquet.io.ParquetDecodingException;
-import parquet.io.api.Binary;
-
-/**
- * a simple implementation of dictionary for plain encoded values
- *
- */
-public abstract class PlainValuesDictionary extends Dictionary {
-
- /**
- * @param dictionaryPage the PLAIN encoded content of the dictionary
- * @throws IOException
- */
- protected PlainValuesDictionary(DictionaryPage dictionaryPage) throws IOException {
- super(dictionaryPage.getEncoding());
- if (dictionaryPage.getEncoding() != PLAIN_DICTIONARY
- && dictionaryPage.getEncoding() != PLAIN) {
- throw new ParquetDecodingException("Dictionary data encoding type not supported: " + dictionaryPage.getEncoding());
- }
- }
-
- /**
- * a simple implementation of dictionary for plain encoded binary
- */
- public static class PlainBinaryDictionary extends PlainValuesDictionary {
-
- private Binary[] binaryDictionaryContent = null;
-
- /**
- * Decodes {@link Binary} values from a {@link DictionaryPage}.
- *
- * Values are read as length-prefixed values with a 4-byte little-endian
- * length.
- *
- * @param dictionaryPage a {@code DictionaryPage} of encoded binary values
- * @throws IOException
- */
- public PlainBinaryDictionary(DictionaryPage dictionaryPage) throws IOException {
- this(dictionaryPage, null);
- }
-
- /**
- * Decodes {@link Binary} values from a {@link DictionaryPage}.
- *
- * If the given {@code length} is null, the values will be read as length-
- * prefixed values with a 4-byte little-endian length. If length is not
- * null, it will be used as the length for all fixed-length {@code Binary}
- * values read from the page.
- *
- * @param dictionaryPage a {@code DictionaryPage} of encoded binary values
- * @param length a fixed length of binary arrays, or null if not fixed
- * @throws IOException
- */
- public PlainBinaryDictionary(DictionaryPage dictionaryPage, Integer length) throws IOException {
- super(dictionaryPage);
- final byte[] dictionaryBytes = dictionaryPage.getBytes().toByteArray();
- binaryDictionaryContent = new Binary[dictionaryPage.getDictionarySize()];
- int offset = 0;
- if (length == null) {
- // dictionary values are stored in order: size (4 bytes LE) followed by {size} bytes
- for (int i = 0; i < binaryDictionaryContent.length; i++) {
- int len = readIntLittleEndian(dictionaryBytes, offset);
- // read the length
- offset += 4;
- // wrap the content in a binary
- binaryDictionaryContent[i] = Binary.fromByteArray(dictionaryBytes, offset, len);
- // increment to the next value
- offset += len;
- }
- } else {
- // dictionary values are stored as fixed-length arrays
- Preconditions.checkArgument(length > 0,
- "Invalid byte array length: " + length);
- for (int i = 0; i < binaryDictionaryContent.length; i++) {
- // wrap the content in a Binary
- binaryDictionaryContent[i] = Binary.fromByteArray(
- dictionaryBytes, offset, length);
- // increment to the next value
- offset += length;
- }
- }
- }
-
- @Override
- public Binary decodeToBinary(int id) {
- return binaryDictionaryContent[id];
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder("PlainBinaryDictionary {\n");
- for (int i = 0; i < binaryDictionaryContent.length; i++) {
- sb.append(i).append(" => ").append(binaryDictionaryContent[i]).append("\n");
- }
- return sb.append("}").toString();
- }
-
- @Override
- public int getMaxId() {
- return binaryDictionaryContent.length - 1;
- }
-
- }
-
- /**
- * a simple implementation of dictionary for plain encoded long values
- */
- public static class PlainLongDictionary extends PlainValuesDictionary {
-
- private long[] longDictionaryContent = null;
-
- /**
- * @param dictionaryPage
- * @throws IOException
- */
- public PlainLongDictionary(DictionaryPage dictionaryPage) throws IOException {
- super(dictionaryPage);
- final byte[] dictionaryBytes = dictionaryPage.getBytes().toByteArray();
- longDictionaryContent = new long[dictionaryPage.getDictionarySize()];
- LongPlainValuesReader longReader = new LongPlainValuesReader();
- longReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryBytes, 0);
- for (int i = 0; i < longDictionaryContent.length; i++) {
- longDictionaryContent[i] = longReader.readLong();
- }
- }
-
- @Override
- public long decodeToLong(int id) {
- return longDictionaryContent[id];
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder("PlainLongDictionary {\n");
- for (int i = 0; i < longDictionaryContent.length; i++) {
- sb.append(i).append(" => ").append(longDictionaryContent[i]).append("\n");
- }
- return sb.append("}").toString();
- }
-
- @Override
- public int getMaxId() {
- return longDictionaryContent.length - 1;
- }
-
- }
-
- /**
- * a simple implementation of dictionary for plain encoded double values
- */
- public static class PlainDoubleDictionary extends PlainValuesDictionary {
-
- private double[] doubleDictionaryContent = null;
-
- /**
- * @param dictionaryPage
- * @throws IOException
- */
- public PlainDoubleDictionary(DictionaryPage dictionaryPage) throws IOException {
- super(dictionaryPage);
- final byte[] dictionaryBytes = dictionaryPage.getBytes().toByteArray();
- doubleDictionaryContent = new double[dictionaryPage.getDictionarySize()];
- DoublePlainValuesReader doubleReader = new DoublePlainValuesReader();
- doubleReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryBytes, 0);
- for (int i = 0; i < doubleDictionaryContent.length; i++) {
- doubleDictionaryContent[i] = doubleReader.readDouble();
- }
- }
-
- @Override
- public double decodeToDouble(int id) {
- return doubleDictionaryContent[id];
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder("PlainDoubleDictionary {\n");
- for (int i = 0; i < doubleDictionaryContent.length; i++) {
- sb.append(i).append(" => ").append(doubleDictionaryContent[i]).append("\n");
- }
- return sb.append("}").toString();
- }
-
- @Override
- public int getMaxId() {
- return doubleDictionaryContent.length - 1;
- }
-
- }
-
- /**
- * a simple implementation of dictionary for plain encoded integer values
- */
- public static class PlainIntegerDictionary extends PlainValuesDictionary {
-
- private int[] intDictionaryContent = null;
-
- /**
- * @param dictionaryPage
- * @throws IOException
- */
- public PlainIntegerDictionary(DictionaryPage dictionaryPage) throws IOException {
- super(dictionaryPage);
- final byte[] dictionaryBytes = dictionaryPage.getBytes().toByteArray();
- intDictionaryContent = new int[dictionaryPage.getDictionarySize()];
- IntegerPlainValuesReader intReader = new IntegerPlainValuesReader();
- intReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryBytes, 0);
- for (int i = 0; i < intDictionaryContent.length; i++) {
- intDictionaryContent[i] = intReader.readInteger();
- }
- }
-
- @Override
- public int decodeToInt(int id) {
- return intDictionaryContent[id];
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder("PlainIntegerDictionary {\n");
- for (int i = 0; i < intDictionaryContent.length; i++) {
- sb.append(i).append(" => ").append(intDictionaryContent[i]).append("\n");
- }
- return sb.append("}").toString();
- }
-
- @Override
- public int getMaxId() {
- return intDictionaryContent.length - 1;
- }
-
- }
-
- /**
- * a simple implementation of dictionary for plain encoded float values
- */
- public static class PlainFloatDictionary extends PlainValuesDictionary {
-
- private float[] floatDictionaryContent = null;
-
- /**
- * @param dictionaryPage
- * @throws IOException
- */
- public PlainFloatDictionary(DictionaryPage dictionaryPage) throws IOException {
- super(dictionaryPage);
- final byte[] dictionaryBytes = dictionaryPage.getBytes().toByteArray();
- floatDictionaryContent = new float[dictionaryPage.getDictionarySize()];
- FloatPlainValuesReader floatReader = new FloatPlainValuesReader();
- floatReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryBytes, 0);
- for (int i = 0; i < floatDictionaryContent.length; i++) {
- floatDictionaryContent[i] = floatReader.readFloat();
- }
- }
-
- @Override
- public float decodeToFloat(int id) {
- return floatDictionaryContent[id];
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder("PlainFloatDictionary {\n");
- for (int i = 0; i < floatDictionaryContent.length; i++) {
- sb.append(i).append(" => ").append(floatDictionaryContent[i]).append("\n");
- }
- return sb.append("}").toString();
- }
-
- @Override
- public int getMaxId() {
- return floatDictionaryContent.length - 1;
- }
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/fallback/FallbackValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/fallback/FallbackValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/fallback/FallbackValuesWriter.java
deleted file mode 100644
index 681ced8..0000000
--- a/parquet-column/src/main/java/parquet/column/values/fallback/FallbackValuesWriter.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.fallback;
-
-import parquet.bytes.BytesInput;
-import parquet.column.Encoding;
-import parquet.column.page.DictionaryPage;
-import parquet.column.values.RequiresFallback;
-import parquet.column.values.ValuesWriter;
-import parquet.io.api.Binary;
-
-public class FallbackValuesWriter<I extends ValuesWriter & RequiresFallback, F extends ValuesWriter> extends ValuesWriter {
-
- public static <I extends ValuesWriter & RequiresFallback, F extends ValuesWriter> FallbackValuesWriter<I, F> of(I initialWriter, F fallBackWriter) {
- return new FallbackValuesWriter<I, F>(initialWriter, fallBackWriter);
- }
-
- /** writer to start with */
- public final I initialWriter;
- /** fallback */
- public final F fallBackWriter;
-
- private boolean fellBackAlready = false;
-
- /** writer currently written to */
- private ValuesWriter currentWriter;
-
- private boolean initialUsedAndHadDictionary = false;
-
- /* size of raw data, even if dictionary is used, it will not have effect on raw data size, it is used to decide
- * if fall back to plain encoding is better by comparing rawDataByteSize with Encoded data size
- * It's also used in getBufferedSize, so the page will be written based on raw data size
- */
- private long rawDataByteSize = 0;
-
- /** indicates if this is the first page being processed */
- private boolean firstPage = true;
-
- public FallbackValuesWriter(I initialWriter, F fallBackWriter) {
- super();
- this.initialWriter = initialWriter;
- this.fallBackWriter = fallBackWriter;
- this.currentWriter = initialWriter;
- }
-
- @Override
- public long getBufferedSize() {
- // use raw data size to decide if we want to flush the page
- // so the actual size of the page written could be much more smaller
- // due to dictionary encoding. This prevents page being too big when fallback happens.
- return rawDataByteSize;
- }
-
- @Override
- public BytesInput getBytes() {
- if (!fellBackAlready && firstPage) {
- // we use the first page to decide if we're going to use this encoding
- BytesInput bytes = initialWriter.getBytes();
- if (!initialWriter.isCompressionSatisfying(rawDataByteSize, bytes.size())) {
- fallBack();
- } else {
- return bytes;
- }
- }
- return currentWriter.getBytes();
- }
-
- @Override
- public Encoding getEncoding() {
- Encoding encoding = currentWriter.getEncoding();
- if (!fellBackAlready && !initialUsedAndHadDictionary) {
- initialUsedAndHadDictionary = encoding.usesDictionary();
- }
- return encoding;
- }
-
- @Override
- public void reset() {
- rawDataByteSize = 0;
- firstPage = false;
- currentWriter.reset();
- }
-
- public DictionaryPage createDictionaryPage() {
- if (initialUsedAndHadDictionary) {
- return initialWriter.createDictionaryPage();
- } else {
- return currentWriter.createDictionaryPage();
- }
- }
-
- public void resetDictionary() {
- if (initialUsedAndHadDictionary) {
- initialWriter.resetDictionary();
- } else {
- currentWriter.resetDictionary();
- }
- currentWriter = initialWriter;
- fellBackAlready = false;
- initialUsedAndHadDictionary = false;
- firstPage = true;
- }
-
- @Override
- public long getAllocatedSize() {
- return currentWriter.getAllocatedSize();
- }
-
- @Override
- public String memUsageString(String prefix) {
- return String.format(
- "%s FallbackValuesWriter{\n"
- + "%s\n"
- + "%s\n"
- + "%s}\n",
- prefix,
- initialWriter.memUsageString(prefix + " initial:"),
- fallBackWriter.memUsageString(prefix + " fallback:"),
- prefix
- );
- }
-
- private void checkFallback() {
- if (!fellBackAlready && initialWriter.shouldFallBack()) {
- fallBack();
- }
- }
-
- private void fallBack() {
- fellBackAlready = true;
- initialWriter.fallBackAllValuesTo(fallBackWriter);
- currentWriter = fallBackWriter;
- }
-
- // passthrough writing the value
-
- public void writeByte(int value) {
- rawDataByteSize += 1;
- currentWriter.writeByte(value);
- checkFallback();
- }
-
- public void writeBytes(Binary v) {
- //for rawdata, length(4 bytes int) is stored, followed by the binary content itself
- rawDataByteSize += v.length() + 4;
- currentWriter.writeBytes(v);
- checkFallback();
- }
-
- public void writeInteger(int v) {
- rawDataByteSize += 4;
- currentWriter.writeInteger(v);
- checkFallback();
- }
-
- public void writeLong(long v) {
- rawDataByteSize += 8;
- currentWriter.writeLong(v);
- checkFallback();
- }
-
- public void writeFloat(float v) {
- rawDataByteSize += 4;
- currentWriter.writeFloat(v);
- checkFallback();
- }
-
- public void writeDouble(double v) {
- rawDataByteSize += 8;
- currentWriter.writeDouble(v);
- checkFallback();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/plain/BinaryPlainValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/plain/BinaryPlainValuesReader.java b/parquet-column/src/main/java/parquet/column/values/plain/BinaryPlainValuesReader.java
deleted file mode 100644
index 80f9e2b..0000000
--- a/parquet-column/src/main/java/parquet/column/values/plain/BinaryPlainValuesReader.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.plain;
-
-import static parquet.Log.DEBUG;
-
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.bytes.BytesUtils;
-import parquet.column.values.ValuesReader;
-import parquet.io.ParquetDecodingException;
-import parquet.io.api.Binary;
-
-public class BinaryPlainValuesReader extends ValuesReader {
- private static final Log LOG = Log.getLog(BinaryPlainValuesReader.class);
- private byte[] in;
- private int offset;
-
- @Override
- public Binary readBytes() {
- try {
- int length = BytesUtils.readIntLittleEndian(in, offset);
- int start = offset + 4;
- offset = start + length;
- return Binary.fromByteArray(in, start, length);
- } catch (IOException e) {
- throw new ParquetDecodingException("could not read bytes at offset " + offset, e);
- } catch (RuntimeException e) {
- throw new ParquetDecodingException("could not read bytes at offset " + offset, e);
- }
- }
-
- @Override
- public void skip() {
- try {
- int length = BytesUtils.readIntLittleEndian(in, offset);
- offset += 4 + length;
- } catch (IOException e) {
- throw new ParquetDecodingException("could not skip bytes at offset " + offset, e);
- } catch (RuntimeException e) {
- throw new ParquetDecodingException("could not skip bytes at offset " + offset, e);
- }
- }
-
- @Override
- public void initFromPage(int valueCount, byte[] in, int offset)
- throws IOException {
- if (DEBUG) LOG.debug("init from page at offset "+ offset + " for length " + (in.length - offset));
- this.in = in;
- this.offset = offset;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/plain/BooleanPlainValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/plain/BooleanPlainValuesReader.java b/parquet-column/src/main/java/parquet/column/values/plain/BooleanPlainValuesReader.java
deleted file mode 100644
index 6eff977..0000000
--- a/parquet-column/src/main/java/parquet/column/values/plain/BooleanPlainValuesReader.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.plain;
-
-import static parquet.Log.DEBUG;
-import static parquet.column.values.bitpacking.Packer.LITTLE_ENDIAN;
-
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.column.values.ValuesReader;
-import parquet.column.values.bitpacking.ByteBitPackingValuesReader;
-
-/**
- * encodes boolean for the plain encoding: one bit at a time (0 = false)
- *
- * @author Julien Le Dem
- *
- */
-public class BooleanPlainValuesReader extends ValuesReader {
- private static final Log LOG = Log.getLog(BooleanPlainValuesReader.class);
-
- private ByteBitPackingValuesReader in = new ByteBitPackingValuesReader(1, LITTLE_ENDIAN);
-
- /**
- *
- * {@inheritDoc}
- * @see parquet.column.values.ValuesReader#readBoolean()
- */
- @Override
- public boolean readBoolean() {
- return in.readInteger() == 0 ? false : true;
- }
-
- /**
- * {@inheritDoc}
- * @see parquet.column.values.ValuesReader#skipBoolean()
- */
- @Override
- public void skip() {
- in.readInteger();
- }
-
- /**
- * {@inheritDoc}
- * @see parquet.column.values.ValuesReader#initFromPage(byte[], int)
- */
- @Override
- public void initFromPage(int valueCount, byte[] in, int offset) throws IOException {
- if (DEBUG) LOG.debug("init from page at offset "+ offset + " for length " + (in.length - offset));
- this.in.initFromPage(valueCount, in, offset);
- }
-
- @Override
- public int getNextOffset() {
- return this.in.getNextOffset();
- }
-
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/plain/BooleanPlainValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/plain/BooleanPlainValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/plain/BooleanPlainValuesWriter.java
deleted file mode 100644
index 0250953..0000000
--- a/parquet-column/src/main/java/parquet/column/values/plain/BooleanPlainValuesWriter.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.plain;
-
-import static parquet.column.Encoding.PLAIN;
-import static parquet.column.values.bitpacking.Packer.LITTLE_ENDIAN;
-import parquet.bytes.BytesInput;
-import parquet.column.Encoding;
-import parquet.column.values.ValuesWriter;
-import parquet.column.values.bitpacking.ByteBitPackingValuesWriter;
-
-
-/**
- * An implementation of the PLAIN encoding
- *
- * @author Julien Le Dem
- *
- */
-public class BooleanPlainValuesWriter extends ValuesWriter {
-
- private ByteBitPackingValuesWriter bitPackingWriter;
-
- public BooleanPlainValuesWriter() {
- bitPackingWriter = new ByteBitPackingValuesWriter(1, LITTLE_ENDIAN);
- }
-
- @Override
- public final void writeBoolean(boolean v) {
- bitPackingWriter.writeInteger(v ? 1 : 0);
- }
-
- @Override
- public long getBufferedSize() {
- return bitPackingWriter.getBufferedSize();
- }
-
- @Override
- public BytesInput getBytes() {
- return bitPackingWriter.getBytes();
- }
-
- @Override
- public void reset() {
- bitPackingWriter.reset();
- }
-
- @Override
- public long getAllocatedSize() {
- return bitPackingWriter.getAllocatedSize();
- }
-
- @Override
- public Encoding getEncoding() {
- return PLAIN;
- }
-
- @Override
- public String memUsageString(String prefix) {
- return bitPackingWriter.memUsageString(prefix);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/plain/FixedLenByteArrayPlainValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/plain/FixedLenByteArrayPlainValuesReader.java b/parquet-column/src/main/java/parquet/column/values/plain/FixedLenByteArrayPlainValuesReader.java
deleted file mode 100644
index 13ed186..0000000
--- a/parquet-column/src/main/java/parquet/column/values/plain/FixedLenByteArrayPlainValuesReader.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.plain;
-
-import java.io.IOException;
-import parquet.Log;
-import parquet.column.values.ValuesReader;
-import parquet.io.ParquetDecodingException;
-import parquet.io.api.Binary;
-
-import static parquet.Log.DEBUG;
-
-/**
- * ValuesReader for FIXED_LEN_BYTE_ARRAY.
- *
- * @author David Z. Chen <dc...@linkedin.com>
- */
-public class FixedLenByteArrayPlainValuesReader extends ValuesReader {
- private static final Log LOG = Log.getLog(FixedLenByteArrayPlainValuesReader.class);
- private byte[] in;
- private int offset;
- private int length;
-
- public FixedLenByteArrayPlainValuesReader(int length) {
- this.length = length;
- }
-
- @Override
- public Binary readBytes() {
- try {
- int start = offset;
- offset = start + length;
- return Binary.fromByteArray(in, start, length);
- } catch (RuntimeException e) {
- throw new ParquetDecodingException("could not read bytes at offset " + offset, e);
- }
- }
-
- @Override
- public void skip() {
- offset += length;
- }
-
- @Override
- public void initFromPage(int valueCount, byte[] in, int offset)
- throws IOException {
- if (DEBUG) LOG.debug("init from page at offset "+ offset + " for length " + (in.length - offset));
- this.in = in;
- this.offset = offset;
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/plain/FixedLenByteArrayPlainValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/plain/FixedLenByteArrayPlainValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/plain/FixedLenByteArrayPlainValuesWriter.java
deleted file mode 100644
index a648efe..0000000
--- a/parquet-column/src/main/java/parquet/column/values/plain/FixedLenByteArrayPlainValuesWriter.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.plain;
-
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.bytes.BytesInput;
-import parquet.bytes.CapacityByteArrayOutputStream;
-import parquet.bytes.LittleEndianDataOutputStream;
-import parquet.column.values.ValuesWriter;
-import parquet.column.Encoding;
-import parquet.io.ParquetEncodingException;
-import parquet.io.api.Binary;
-
-/**
- * ValuesWriter for FIXED_LEN_BYTE_ARRAY.
- *
- * @author David Z. Chen <dc...@linkedin.com>
- */
-public class FixedLenByteArrayPlainValuesWriter extends ValuesWriter {
- private static final Log LOG = Log.getLog(PlainValuesWriter.class);
-
- private CapacityByteArrayOutputStream arrayOut;
- private LittleEndianDataOutputStream out;
- private int length;
-
- public FixedLenByteArrayPlainValuesWriter(int length, int initialSize, int pageSize) {
- this.length = length;
- this.arrayOut = new CapacityByteArrayOutputStream(initialSize, pageSize);
- this.out = new LittleEndianDataOutputStream(arrayOut);
- }
-
- @Override
- public final void writeBytes(Binary v) {
- if (v.length() != length) {
- throw new IllegalArgumentException("Fixed Binary size " + v.length() +
- " does not match field type length " + length);
- }
- try {
- v.writeTo(out);
- } catch (IOException e) {
- throw new ParquetEncodingException("could not write fixed bytes", e);
- }
- }
-
- @Override
- public long getBufferedSize() {
- return arrayOut.size();
- }
-
- @Override
- public BytesInput getBytes() {
- try {
- out.flush();
- } catch (IOException e) {
- throw new ParquetEncodingException("could not write page", e);
- }
- if (Log.DEBUG) LOG.debug("writing a buffer of size " + arrayOut.size());
- return BytesInput.from(arrayOut);
- }
-
- @Override
- public void reset() {
- arrayOut.reset();
- }
-
- @Override
- public long getAllocatedSize() {
- return arrayOut.getCapacity();
- }
-
- @Override
- public Encoding getEncoding() {
- return Encoding.PLAIN;
- }
-
- @Override
- public String memUsageString(String prefix) {
- return arrayOut.memUsageString(prefix + " PLAIN");
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/plain/PlainValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/plain/PlainValuesReader.java b/parquet-column/src/main/java/parquet/column/values/plain/PlainValuesReader.java
deleted file mode 100644
index 710a672..0000000
--- a/parquet-column/src/main/java/parquet/column/values/plain/PlainValuesReader.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.plain;
-
-import static parquet.Log.DEBUG;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.bytes.LittleEndianDataInputStream;
-import parquet.column.values.ValuesReader;
-import parquet.io.ParquetDecodingException;
-
-/**
- * Plain encoding for float, double, int, long
- *
- * @author Julien Le Dem
- *
- */
-abstract public class PlainValuesReader extends ValuesReader {
- private static final Log LOG = Log.getLog(PlainValuesReader.class);
-
- protected LittleEndianDataInputStream in;
-
- /**
- * {@inheritDoc}
- * @see parquet.column.values.ValuesReader#initFromPage(byte[], int)
- */
- @Override
- public void initFromPage(int valueCount, byte[] in, int offset) throws IOException {
- if (DEBUG) LOG.debug("init from page at offset "+ offset + " for length " + (in.length - offset));
- this.in = new LittleEndianDataInputStream(new ByteArrayInputStream(in, offset, in.length - offset));
- }
-
- public static class DoublePlainValuesReader extends PlainValuesReader {
-
- @Override
- public void skip() {
- try {
- in.skipBytes(8);
- } catch (IOException e) {
- throw new ParquetDecodingException("could not skip double", e);
- }
- }
-
- @Override
- public double readDouble() {
- try {
- return in.readDouble();
- } catch (IOException e) {
- throw new ParquetDecodingException("could not read double", e);
- }
- }
- }
-
- public static class FloatPlainValuesReader extends PlainValuesReader {
-
- @Override
- public void skip() {
- try {
- in.skipBytes(4);
- } catch (IOException e) {
- throw new ParquetDecodingException("could not skip float", e);
- }
- }
-
- @Override
- public float readFloat() {
- try {
- return in.readFloat();
- } catch (IOException e) {
- throw new ParquetDecodingException("could not read float", e);
- }
- }
- }
-
- public static class IntegerPlainValuesReader extends PlainValuesReader {
-
- @Override
- public void skip() {
- try {
- in.skipBytes(4);
- } catch (IOException e) {
- throw new ParquetDecodingException("could not skip int", e);
- }
- }
-
- @Override
- public int readInteger() {
- try {
- return in.readInt();
- } catch (IOException e) {
- throw new ParquetDecodingException("could not read int", e);
- }
- }
- }
-
- public static class LongPlainValuesReader extends PlainValuesReader {
-
- @Override
- public void skip() {
- try {
- in.skipBytes(8);
- } catch (IOException e) {
- throw new ParquetDecodingException("could not skip long", e);
- }
- }
-
- @Override
- public long readLong() {
- try {
- return in.readLong();
- } catch (IOException e) {
- throw new ParquetDecodingException("could not read long", e);
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/plain/PlainValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/plain/PlainValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/plain/PlainValuesWriter.java
deleted file mode 100644
index dd624a4..0000000
--- a/parquet-column/src/main/java/parquet/column/values/plain/PlainValuesWriter.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.plain;
-
-import java.io.IOException;
-import java.nio.charset.Charset;
-
-import parquet.Log;
-import parquet.bytes.BytesInput;
-import parquet.bytes.CapacityByteArrayOutputStream;
-import parquet.bytes.LittleEndianDataOutputStream;
-import parquet.column.Encoding;
-import parquet.column.values.ValuesWriter;
-import parquet.io.ParquetEncodingException;
-import parquet.io.api.Binary;
-
-/**
- * Plain encoding except for booleans
- *
- * @author Julien Le Dem
- *
- */
-public class PlainValuesWriter extends ValuesWriter {
- private static final Log LOG = Log.getLog(PlainValuesWriter.class);
-
- public static final Charset CHARSET = Charset.forName("UTF-8");
-
- private CapacityByteArrayOutputStream arrayOut;
- private LittleEndianDataOutputStream out;
-
- public PlainValuesWriter(int initialSize, int pageSize) {
- arrayOut = new CapacityByteArrayOutputStream(initialSize, pageSize);
- out = new LittleEndianDataOutputStream(arrayOut);
- }
-
- @Override
- public final void writeBytes(Binary v) {
- try {
- out.writeInt(v.length());
- v.writeTo(out);
- } catch (IOException e) {
- throw new ParquetEncodingException("could not write bytes", e);
- }
- }
-
- @Override
- public final void writeInteger(int v) {
- try {
- out.writeInt(v);
- } catch (IOException e) {
- throw new ParquetEncodingException("could not write int", e);
- }
- }
-
- @Override
- public final void writeLong(long v) {
- try {
- out.writeLong(v);
- } catch (IOException e) {
- throw new ParquetEncodingException("could not write long", e);
- }
- }
-
- @Override
- public final void writeFloat(float v) {
- try {
- out.writeFloat(v);
- } catch (IOException e) {
- throw new ParquetEncodingException("could not write float", e);
- }
- }
-
- @Override
- public final void writeDouble(double v) {
- try {
- out.writeDouble(v);
- } catch (IOException e) {
- throw new ParquetEncodingException("could not write double", e);
- }
- }
-
- @Override
- public void writeByte(int value) {
- try {
- out.write(value);
- } catch (IOException e) {
- throw new ParquetEncodingException("could not write byte", e);
- }
- }
-
- @Override
- public long getBufferedSize() {
- return arrayOut.size();
- }
-
- @Override
- public BytesInput getBytes() {
- try {
- out.flush();
- } catch (IOException e) {
- throw new ParquetEncodingException("could not write page", e);
- }
- if (Log.DEBUG) LOG.debug("writing a buffer of size " + arrayOut.size());
- return BytesInput.from(arrayOut);
- }
-
- @Override
- public void reset() {
- arrayOut.reset();
- }
-
- @Override
- public long getAllocatedSize() {
- return arrayOut.getCapacity();
- }
-
- @Override
- public Encoding getEncoding() {
- return Encoding.PLAIN;
- }
-
- @Override
- public String memUsageString(String prefix) {
- return arrayOut.memUsageString(prefix + " PLAIN");
- }
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java b/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java
deleted file mode 100644
index 05234be..0000000
--- a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.rle;
-
-import static parquet.Log.DEBUG;
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInputStream;
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.Preconditions;
-import parquet.bytes.BytesUtils;
-import parquet.column.values.bitpacking.BytePacker;
-import parquet.column.values.bitpacking.Packer;
-import parquet.io.ParquetDecodingException;
-
-/**
- * Decodes values written in the grammar described in {@link RunLengthBitPackingHybridEncoder}
- *
- * @author Julien Le Dem
- */
-public class RunLengthBitPackingHybridDecoder {
- private static final Log LOG = Log.getLog(RunLengthBitPackingHybridDecoder.class);
-
- private static enum MODE { RLE, PACKED }
-
- private final int bitWidth;
- private final BytePacker packer;
- private final ByteArrayInputStream in;
-
- private MODE mode;
- private int currentCount;
- private int currentValue;
- private int[] currentBuffer;
-
- public RunLengthBitPackingHybridDecoder(int bitWidth, ByteArrayInputStream in) {
- if (DEBUG) LOG.debug("decoding bitWidth " + bitWidth);
-
- Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
- this.bitWidth = bitWidth;
- this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
- this.in = in;
- }
-
- public int readInt() throws IOException {
- if (currentCount == 0) {
- readNext();
- }
- -- currentCount;
- int result;
- switch (mode) {
- case RLE:
- result = currentValue;
- break;
- case PACKED:
- result = currentBuffer[currentBuffer.length - 1 - currentCount];
- break;
- default:
- throw new ParquetDecodingException("not a valid mode " + mode);
- }
- return result;
- }
-
- private void readNext() throws IOException {
- Preconditions.checkArgument(in.available() > 0, "Reading past RLE/BitPacking stream.");
- final int header = BytesUtils.readUnsignedVarInt(in);
- mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED;
- switch (mode) {
- case RLE:
- currentCount = header >>> 1;
- if (DEBUG) LOG.debug("reading " + currentCount + " values RLE");
- currentValue = BytesUtils.readIntLittleEndianPaddedOnBitWidth(in, bitWidth);
- break;
- case PACKED:
- int numGroups = header >>> 1;
- currentCount = numGroups * 8;
- if (DEBUG) LOG.debug("reading " + currentCount + " values BIT PACKED");
- currentBuffer = new int[currentCount]; // TODO: reuse a buffer
- byte[] bytes = new byte[numGroups * bitWidth];
- // At the end of the file RLE data though, there might not be that many bytes left.
- int bytesToRead = (int)Math.ceil(currentCount * bitWidth / 8.0);
- bytesToRead = Math.min(bytesToRead, in.available());
- new DataInputStream(in).readFully(bytes, 0, bytesToRead);
- for (int valueIndex = 0, byteIndex = 0; valueIndex < currentCount; valueIndex += 8, byteIndex += bitWidth) {
- packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex);
- }
- break;
- default:
- throw new ParquetDecodingException("not a valid mode " + mode);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java b/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java
deleted file mode 100644
index 1456217..0000000
--- a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.rle;
-
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.Preconditions;
-import parquet.bytes.BytesInput;
-import parquet.bytes.BytesUtils;
-import parquet.bytes.CapacityByteArrayOutputStream;
-import parquet.column.values.bitpacking.BytePacker;
-import parquet.column.values.bitpacking.Packer;
-
-import static parquet.Log.DEBUG;
-
-/**
- * Encodes values using a combination of run length encoding and bit packing,
- * according to the following grammar:
- *
- * <pre>
- * {@code
- * rle-bit-packed-hybrid: <length> <encoded-data>
- * length := length of the <encoded-data> in bytes stored as 4 bytes little endian
- * encoded-data := <run>*
- * run := <bit-packed-run> | <rle-run>
- * bit-packed-run := <bit-packed-header> <bit-packed-values>
- * bit-packed-header := varint-encode(<bit-pack-count> << 1 | 1)
- * // we always bit-pack a multiple of 8 values at a time, so we only store the number of values / 8
- * bit-pack-count := (number of values in this run) / 8
- * bit-packed-values := bit packed back to back, from LSB to MSB
- * rle-run := <rle-header> <repeated-value>
- * rle-header := varint-encode( (number of times repeated) << 1)
- * repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width)
- * }
- * </pre>
- * NOTE: this class is only responsible for creating and returning the {@code <encoded-data>}
- * portion of the above grammar. The {@code <length>} portion is done by
- * {@link RunLengthBitPackingHybridValuesWriter}
- * <p>
- * Only supports values >= 0 // TODO: is that ok? Should we make a signed version?
- *
- * @author Alex Levenson
- */
-public class RunLengthBitPackingHybridEncoder {
- private static final Log LOG = Log.getLog(RunLengthBitPackingHybridEncoder.class);
-
- private final BytePacker packer;
-
- private final CapacityByteArrayOutputStream baos;
-
- /**
- * The bit width used for bit-packing and for writing
- * the repeated-value
- */
- private final int bitWidth;
-
- /**
- * Values that are bit packed 8 at at a time are packed into this
- * buffer, which is then written to baos
- */
- private final byte[] packBuffer;
-
- /**
- * Previous value written, used to detect repeated values
- */
- private int previousValue;
-
- /**
- * We buffer 8 values at a time, and either bit pack them
- * or discard them after writing a rle-run
- */
- private final int[] bufferedValues;
- private int numBufferedValues;
-
- /**
- * How many times a value has been repeated
- */
- private int repeatCount;
-
- /**
- * How many groups of 8 values have been written
- * to the current bit-packed-run
- */
- private int bitPackedGroupCount;
-
- /**
- * A "pointer" to a single byte in baos,
- * which we use as our bit-packed-header. It's really
- * the logical index of the byte in baos.
- *
- * We are only using one byte for this header,
- * which limits us to writing 504 values per bit-packed-run.
- *
- * MSB must be 0 for varint encoding, LSB must be 1 to signify
- * that this is a bit-packed-header leaves 6 bits to write the
- * number of 8-groups -> (2^6 - 1) * 8 = 504
- */
- private long bitPackedRunHeaderPointer;
-
- private boolean toBytesCalled;
-
- public RunLengthBitPackingHybridEncoder(int bitWidth, int initialCapacity, int pageSize) {
- if (DEBUG) {
- LOG.debug(String.format("Encoding: RunLengthBitPackingHybridEncoder with "
- + "bithWidth: %d initialCapacity %d", bitWidth, initialCapacity));
- }
-
- Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
-
- this.bitWidth = bitWidth;
- this.baos = new CapacityByteArrayOutputStream(initialCapacity, pageSize);
- this.packBuffer = new byte[bitWidth];
- this.bufferedValues = new int[8];
- this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
- reset(false);
- }
-
- private void reset(boolean resetBaos) {
- if (resetBaos) {
- this.baos.reset();
- }
- this.previousValue = 0;
- this.numBufferedValues = 0;
- this.repeatCount = 0;
- this.bitPackedGroupCount = 0;
- this.bitPackedRunHeaderPointer = -1;
- this.toBytesCalled = false;
- }
-
- public void writeInt(int value) throws IOException {
- if (value == previousValue) {
- // keep track of how many times we've seen this value
- // consecutively
- ++repeatCount;
-
- if (repeatCount >= 8) {
- // we've seen this at least 8 times, we're
- // certainly going to write an rle-run,
- // so just keep on counting repeats for now
- return;
- }
- } else {
- // This is a new value, check if it signals the end of
- // an rle-run
- if (repeatCount >= 8) {
- // it does! write an rle-run
- writeRleRun();
- }
-
- // this is a new value so we've only seen it once
- repeatCount = 1;
- // start tracking this value for repeats
- previousValue = value;
- }
-
- // We have not seen enough repeats to justify an rle-run yet,
- // so buffer this value in case we decide to write a bit-packed-run
- bufferedValues[numBufferedValues] = value;
- ++numBufferedValues;
-
- if (numBufferedValues == 8) {
- // we've encountered less than 8 repeated values, so
- // either start a new bit-packed-run or append to the
- // current bit-packed-run
- writeOrAppendBitPackedRun();
- }
- }
-
- private void writeOrAppendBitPackedRun() throws IOException {
- if (bitPackedGroupCount >= 63) {
- // we've packed as many values as we can for this run,
- // end it and start a new one
- endPreviousBitPackedRun();
- }
-
- if (bitPackedRunHeaderPointer == -1) {
- // this is a new bit-packed-run, allocate a byte for the header
- // and keep a "pointer" to it so that it can be mutated later
- baos.write(0); // write a sentinel value
- bitPackedRunHeaderPointer = baos.getCurrentIndex();
- }
-
- packer.pack8Values(bufferedValues, 0, packBuffer, 0);
- baos.write(packBuffer);
-
- // empty the buffer, they've all been written
- numBufferedValues = 0;
-
- // clear the repeat count, as some repeated values
- // may have just been bit packed into this run
- repeatCount = 0;
-
- ++bitPackedGroupCount;
- }
-
- /**
- * If we are currently writing a bit-packed-run, update the
- * bit-packed-header and consider this run to be over
- *
- * does nothing if we're not currently writing a bit-packed run
- */
- private void endPreviousBitPackedRun() {
- if (bitPackedRunHeaderPointer == -1) {
- // we're not currently in a bit-packed-run
- return;
- }
-
- // create bit-packed-header, which needs to fit in 1 byte
- byte bitPackHeader = (byte) ((bitPackedGroupCount << 1) | 1);
-
- // update this byte
- baos.setByte(bitPackedRunHeaderPointer, bitPackHeader);
-
- // mark that this run is over
- bitPackedRunHeaderPointer = -1;
-
- // reset the number of groups
- bitPackedGroupCount = 0;
- }
-
- private void writeRleRun() throws IOException {
- // we may have been working on a bit-packed-run
- // so close that run if it exists before writing this
- // rle-run
- endPreviousBitPackedRun();
-
- // write the rle-header (lsb of 0 signifies a rle run)
- BytesUtils.writeUnsignedVarInt(repeatCount << 1, baos);
- // write the repeated-value
- BytesUtils.writeIntLittleEndianPaddedOnBitWidth(baos, previousValue, bitWidth);
-
- // reset the repeat count
- repeatCount = 0;
-
- // throw away all the buffered values, they were just repeats and they've been written
- numBufferedValues = 0;
- }
-
- public BytesInput toBytes() throws IOException {
- Preconditions.checkArgument(!toBytesCalled,
- "You cannot call toBytes() more than once without calling reset()");
-
- // write anything that is buffered / queued up for an rle-run
- if (repeatCount >= 8) {
- writeRleRun();
- } else if(numBufferedValues > 0) {
- for (int i = numBufferedValues; i < 8; i++) {
- bufferedValues[i] = 0;
- }
- writeOrAppendBitPackedRun();
- endPreviousBitPackedRun();
- } else {
- endPreviousBitPackedRun();
- }
-
- toBytesCalled = true;
- return BytesInput.from(baos);
- }
-
- /**
- * Reset this encoder for re-use
- */
- public void reset() {
- reset(true);
- }
-
- public long getBufferedSize() {
- return baos.size();
- }
-
- public long getAllocatedSize() {
- return baos.getCapacity();
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridValuesReader.java b/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridValuesReader.java
deleted file mode 100644
index d0ce978..0000000
--- a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridValuesReader.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.rle;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-
-import parquet.bytes.BytesUtils;
-import parquet.column.values.ValuesReader;
-import parquet.io.ParquetDecodingException;
-
-/**
- * This ValuesReader does all the reading in {@link #initFromPage}
- * and stores the values in an in memory buffer, which is less than ideal.
- *
- * @author Alex Levenson
- */
-public class RunLengthBitPackingHybridValuesReader extends ValuesReader {
- private final int bitWidth;
- private RunLengthBitPackingHybridDecoder decoder;
- private int nextOffset;
-
- public RunLengthBitPackingHybridValuesReader(int bitWidth) {
- this.bitWidth = bitWidth;
- }
-
- @Override
- public void initFromPage(int valueCountL, byte[] page, int offset) throws IOException {
- ByteArrayInputStream in = new ByteArrayInputStream(page, offset, page.length - offset);
- int length = BytesUtils.readIntLittleEndian(in);
-
- decoder = new RunLengthBitPackingHybridDecoder(bitWidth, in);
-
- // 4 is for the length which is stored as 4 bytes little endian
- this.nextOffset = offset + length + 4;
- }
-
- @Override
- public int getNextOffset() {
- return this.nextOffset;
- }
-
- @Override
- public int readInteger() {
- try {
- return decoder.readInt();
- } catch (IOException e) {
- throw new ParquetDecodingException(e);
- }
- }
-
- @Override
- public boolean readBoolean() {
- return readInteger() == 0 ? false : true;
- }
-
- @Override
- public void skip() {
- readInteger();
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java
deleted file mode 100644
index 641f5fe..0000000
--- a/parquet-column/src/main/java/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.rle;
-
-import java.io.IOException;
-
-import parquet.Ints;
-import parquet.bytes.BytesInput;
-import parquet.column.Encoding;
-import parquet.column.values.ValuesWriter;
-import parquet.io.ParquetEncodingException;
-
-/**
- * @author Alex Levenson
- */
-public class RunLengthBitPackingHybridValuesWriter extends ValuesWriter {
- private final RunLengthBitPackingHybridEncoder encoder;
-
- public RunLengthBitPackingHybridValuesWriter(int bitWidth, int initialCapacity, int pageSize) {
- this.encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialCapacity, pageSize);
- }
-
- @Override
- public void writeInteger(int v) {
- try {
- encoder.writeInt(v);
- } catch (IOException e) {
- throw new ParquetEncodingException(e);
- }
- }
-
- @Override
- public void writeBoolean(boolean v) {
- writeInteger(v ? 1 : 0);
- }
-
- @Override
- public long getBufferedSize() {
- return encoder.getBufferedSize();
- }
-
- @Override
- public long getAllocatedSize() {
- return encoder.getAllocatedSize();
- }
-
- @Override
- public BytesInput getBytes() {
- try {
- // prepend the length of the column
- BytesInput rle = encoder.toBytes();
- return BytesInput.concat(BytesInput.fromInt(Ints.checkedCast(rle.size())), rle);
- } catch (IOException e) {
- throw new ParquetEncodingException(e);
- }
- }
-
- @Override
- public Encoding getEncoding() {
- return Encoding.RLE;
- }
-
- @Override
- public void reset() {
- encoder.reset();
- }
-
- @Override
- public String memUsageString(String prefix) {
- return String.format("%s RunLengthBitPackingHybrid %d bytes", prefix, getAllocatedSize());
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/example/DummyRecordConverter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/example/DummyRecordConverter.java b/parquet-column/src/main/java/parquet/example/DummyRecordConverter.java
deleted file mode 100644
index 5c4f5bc..0000000
--- a/parquet-column/src/main/java/parquet/example/DummyRecordConverter.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.example;
-
-import java.util.List;
-
-import parquet.io.api.Binary;
-import parquet.io.api.Converter;
-import parquet.io.api.GroupConverter;
-import parquet.io.api.PrimitiveConverter;
-import parquet.io.api.RecordMaterializer;
-import parquet.schema.GroupType;
-import parquet.schema.MessageType;
-import parquet.schema.PrimitiveType;
-import parquet.schema.TypeConverter;
-
-/**
- * Dummy implementation for perf tests
- *
- * @author Julien Le Dem
- *
- */
-public final class DummyRecordConverter extends RecordMaterializer<Object> {
-
- private Object a;
- private GroupConverter root;
-
- public DummyRecordConverter(MessageType schema) {
- this.root = (GroupConverter)schema.convertWith(new TypeConverter<Converter>() {
-
- @Override
- public Converter convertPrimitiveType(List<GroupType> path, PrimitiveType primitiveType) {
- return new PrimitiveConverter() {
-
- @Override
- public void addBinary(Binary value) {
- a = value;
- }
- @Override
- public void addBoolean(boolean value) {
- a = value;
- }
- @Override
- public void addDouble(double value) {
- a = value;
- }
- @Override
- public void addFloat(float value) {
- a = value;
- }
- @Override
- public void addInt(int value) {
- a = value;
- }
- @Override
- public void addLong(long value) {
- a = value;
- }
- };
- }
-
- @Override
- public Converter convertGroupType(List<GroupType> path, GroupType groupType, final List<Converter> converters) {
- return new GroupConverter() {
-
- public Converter getConverter(int fieldIndex) {
- return converters.get(fieldIndex);
- }
-
- public void start() {
- a = "start()";
- }
-
- public void end() {
- a = "end()";
- }
-
- };
- }
-
- @Override
- public Converter convertMessageType(MessageType messageType, List<Converter> children) {
- return convertGroupType(null, messageType, children);
- }
- });
- }
-
- @Override
- public Object getCurrentRecord() {
- return a;
- }
-
- @Override
- public GroupConverter getRootConverter() {
- return root;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/example/Paper.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/example/Paper.java b/parquet-column/src/main/java/parquet/example/Paper.java
deleted file mode 100644
index 2e638a4..0000000
--- a/parquet-column/src/main/java/parquet/example/Paper.java
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.example;
-
-import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
-import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
-import static parquet.schema.Type.Repetition.OPTIONAL;
-import static parquet.schema.Type.Repetition.REPEATED;
-import static parquet.schema.Type.Repetition.REQUIRED;
-import parquet.example.data.Group;
-import parquet.example.data.simple.SimpleGroup;
-import parquet.schema.GroupType;
-import parquet.schema.MessageType;
-import parquet.schema.PrimitiveType;
-
-/**
- * Examples from the Dremel Paper
- *
- * @author Julien Le Dem
- *
- */
-public class Paper {
- public static final MessageType schema =
- new MessageType("Document",
- new PrimitiveType(REQUIRED, INT64, "DocId"),
- new GroupType(OPTIONAL, "Links",
- new PrimitiveType(REPEATED, INT64, "Backward"),
- new PrimitiveType(REPEATED, INT64, "Forward")
- ),
- new GroupType(REPEATED, "Name",
- new GroupType(REPEATED, "Language",
- new PrimitiveType(REQUIRED, BINARY, "Code"),
- new PrimitiveType(OPTIONAL, BINARY, "Country")),
- new PrimitiveType(OPTIONAL, BINARY, "Url")));
-
- public static final MessageType schema2 =
- new MessageType("Document",
- new PrimitiveType(REQUIRED, INT64, "DocId"),
- new GroupType(REPEATED, "Name",
- new GroupType(REPEATED, "Language",
- new PrimitiveType(OPTIONAL, BINARY, "Country"))));
-
- public static final MessageType schema3 =
- new MessageType("Document",
- new PrimitiveType(REQUIRED, INT64, "DocId"),
- new GroupType(OPTIONAL, "Links",
- new PrimitiveType(REPEATED, INT64, "Backward"),
- new PrimitiveType(REPEATED, INT64, "Forward")
- ));
-
- public static final SimpleGroup r1 = new SimpleGroup(schema);
- public static final SimpleGroup r2 = new SimpleGroup(schema);
- ////r1
- //DocId: 10
- //Links
- // Forward: 20
- // Forward: 40
- // Forward: 60
- //Name
- // Language
- // Code: 'en-us'
- // Country: 'us'
- // Language
- // Code: 'en'
- // Url: 'http://A'
- //Name
- // Url: 'http://B'
- //Name
- // Language
- // Code: 'en-gb'
- // Country: 'gb'
- static {
- r1.add("DocId", 10l);
- r1.addGroup("Links")
- .append("Forward", 20l)
- .append("Forward", 40l)
- .append("Forward", 60l);
- Group name = r1.addGroup("Name");
- {
- name.addGroup("Language")
- .append("Code", "en-us")
- .append("Country", "us");
- name.addGroup("Language")
- .append("Code", "en");
- name.append("Url", "http://A");
- }
- name = r1.addGroup("Name");
- {
- name.append("Url", "http://B");
- }
- name = r1.addGroup("Name");
- {
- name.addGroup("Language")
- .append("Code", "en-gb")
- .append("Country", "gb");
- }
- }
- ////r2
- //DocId: 20
- //Links
- // Backward: 10
- // Backward: 30
- // Forward: 80
- //Name
- // Url: 'http://C'
- static {
- r2.add("DocId", 20l);
- r2.addGroup("Links")
- .append("Backward", 10l)
- .append("Backward", 30l)
- .append("Forward", 80l);
- r2.addGroup("Name")
- .append("Url", "http://C");
- }
-
- public static final SimpleGroup pr1 = new SimpleGroup(schema2);
- public static final SimpleGroup pr2 = new SimpleGroup(schema2);
- ////r1
- //DocId: 10
- //Name
- // Language
- // Country: 'us'
- // Language
- //Name
- //Name
- // Language
- // Country: 'gb'
- static {
- pr1.add("DocId", 10l);
- Group name = pr1.addGroup("Name");
- {
- name.addGroup("Language")
- .append("Country", "us");
- name.addGroup("Language");
- }
- name = pr1.addGroup("Name");
- name = pr1.addGroup("Name");
- {
- name.addGroup("Language")
- .append("Country", "gb");
- }
- }
-
- ////r2
- //DocId: 20
- //Name
- static {
- pr2.add("DocId", 20l);
- pr2.addGroup("Name");
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/example/data/Group.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/example/data/Group.java b/parquet-column/src/main/java/parquet/example/data/Group.java
deleted file mode 100644
index ea21b5c..0000000
--- a/parquet-column/src/main/java/parquet/example/data/Group.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.example.data;
-
-import parquet.Log;
-import parquet.example.data.simple.NanoTime;
-import parquet.io.api.Binary;
-import parquet.io.api.RecordConsumer;
-
-abstract public class Group extends GroupValueSource {
- private static final Log logger = Log.getLog(Group.class);
- private static final boolean DEBUG = Log.DEBUG;
-
- public void add(String field, int value) {
- add(getType().getFieldIndex(field), value);
- }
-
- public void add(String field, long value) {
- add(getType().getFieldIndex(field), value);
- }
-
- public void add(String field, float value) {
- add(getType().getFieldIndex(field), value);
- }
-
- public void add(String field, double value) {
- add(getType().getFieldIndex(field), value);
- }
-
- public void add(String field, String value) {
- add(getType().getFieldIndex(field), value);
- }
-
- public void add(String field, NanoTime value) {
- add(getType().getFieldIndex(field), value);
- }
-
- public void add(String field, boolean value) {
- add(getType().getFieldIndex(field), value);
- }
-
- public void add(String field, Binary value) {
- add(getType().getFieldIndex(field), value);
- }
-
- public void add(String field, Group value) {
- add(getType().getFieldIndex(field), value);
- }
-
- public Group addGroup(String field) {
- if (DEBUG) logger.debug("add group "+field+" to "+getType().getName());
- return addGroup(getType().getFieldIndex(field));
- }
-
- public Group getGroup(String field, int index) {
- return getGroup(getType().getFieldIndex(field), index);
- }
-
- abstract public void add(int fieldIndex, int value);
-
- abstract public void add(int fieldIndex, long value);
-
- abstract public void add(int fieldIndex, String value);
-
- abstract public void add(int fieldIndex, boolean value);
-
- abstract public void add(int fieldIndex, NanoTime value);
-
- abstract public void add(int fieldIndex, Binary value);
-
- abstract public void add(int fieldIndex, float value);
-
- abstract public void add(int fieldIndex, double value);
-
- abstract public void add(int fieldIndex, Group value);
-
- abstract public Group addGroup(int fieldIndex);
-
- abstract public Group getGroup(int fieldIndex, int index);
-
- public Group asGroup() {
- return this;
- }
-
- public Group append(String fieldName, int value) {
- add(fieldName, value);
- return this;
- }
-
- public Group append(String fieldName, float value) {
- add(fieldName, value);
- return this;
- }
-
- public Group append(String fieldName, double value) {
- add(fieldName, value);
- return this;
- }
-
- public Group append(String fieldName, long value) {
- add(fieldName, value);
- return this;
- }
-
- public Group append(String fieldName, NanoTime value) {
- add(fieldName, value);
- return this;
- }
-
- public Group append(String fieldName, String value) {
- add(fieldName, Binary.fromString(value));
- return this;
- }
-
- public Group append(String fieldName, boolean value) {
- add(fieldName, value);
- return this;
- }
-
- public Group append(String fieldName, Binary value) {
- add(fieldName, value);
- return this;
- }
-
- abstract public void writeValue(int field, int index, RecordConsumer recordConsumer);
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/example/data/GroupFactory.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/example/data/GroupFactory.java b/parquet-column/src/main/java/parquet/example/data/GroupFactory.java
deleted file mode 100644
index babbfef..0000000
--- a/parquet-column/src/main/java/parquet/example/data/GroupFactory.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.example.data;
-
-abstract public class GroupFactory {
-
- abstract public Group newGroup();
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/example/data/GroupValueSource.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/example/data/GroupValueSource.java b/parquet-column/src/main/java/parquet/example/data/GroupValueSource.java
deleted file mode 100644
index 7ba9e05..0000000
--- a/parquet-column/src/main/java/parquet/example/data/GroupValueSource.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.example.data;
-
-import parquet.io.api.Binary;
-import parquet.schema.GroupType;
-
-abstract public class GroupValueSource {
-
- public int getFieldRepetitionCount(String field) {
- return getFieldRepetitionCount(getType().getFieldIndex(field));
- }
-
- public GroupValueSource getGroup(String field, int index) {
- return getGroup(getType().getFieldIndex(field), index);
- }
-
- public String getString(String field, int index) {
- return getString(getType().getFieldIndex(field), index);
- }
-
- public int getInteger(String field, int index) {
- return getInteger(getType().getFieldIndex(field), index);
- }
-
- public long getLong(String field, int index) {
- return getLong(getType().getFieldIndex(field), index);
- }
-
- public double getDouble(String field, int index) {
- return getDouble(getType().getFieldIndex(field), index);
- }
-
- public float getFloat(String field, int index) {
- return getFloat(getType().getFieldIndex(field), index);
- }
-
- public boolean getBoolean(String field, int index) {
- return getBoolean(getType().getFieldIndex(field), index);
- }
-
- public Binary getBinary(String field, int index) {
- return getBinary(getType().getFieldIndex(field), index);
- }
-
- public Binary getInt96(String field, int index) {
- return getInt96(getType().getFieldIndex(field), index);
- }
-
- abstract public int getFieldRepetitionCount(int fieldIndex);
-
- abstract public GroupValueSource getGroup(int fieldIndex, int index);
-
- abstract public String getString(int fieldIndex, int index);
-
- abstract public int getInteger(int fieldIndex, int index);
-
- abstract public long getLong(int fieldIndex, int index);
-
- abstract public double getDouble(int fieldIndex, int index);
-
- abstract public float getFloat(int fieldIndex, int index);
-
- abstract public boolean getBoolean(int fieldIndex, int index);
-
- abstract public Binary getBinary(int fieldIndex, int index);
-
- abstract public Binary getInt96(int fieldIndex, int index);
-
- abstract public String getValueToString(int fieldIndex, int index);
-
- abstract public GroupType getType();
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/example/data/GroupWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/example/data/GroupWriter.java b/parquet-column/src/main/java/parquet/example/data/GroupWriter.java
deleted file mode 100644
index 42c6d26..0000000
--- a/parquet-column/src/main/java/parquet/example/data/GroupWriter.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.example.data;
-
-import parquet.io.api.RecordConsumer;
-import parquet.schema.GroupType;
-import parquet.schema.Type;
-
-public class GroupWriter {
-
- private final RecordConsumer recordConsumer;
- private final GroupType schema;
-
- public GroupWriter(RecordConsumer recordConsumer, GroupType schema) {
- this.recordConsumer = recordConsumer;
- this.schema = schema;
- }
-
- public void write(Group group) {
- recordConsumer.startMessage();
- writeGroup(group, schema);
- recordConsumer.endMessage();
- }
-
- private void writeGroup(Group group, GroupType type) {
- int fieldCount = type.getFieldCount();
- for (int field = 0; field < fieldCount; ++field) {
- int valueCount = group.getFieldRepetitionCount(field);
- if (valueCount > 0) {
- Type fieldType = type.getType(field);
- String fieldName = fieldType.getName();
- recordConsumer.startField(fieldName, field);
- for (int index = 0; index < valueCount; ++index) {
- if (fieldType.isPrimitive()) {
- group.writeValue(field, index, recordConsumer);
- } else {
- recordConsumer.startGroup();
- writeGroup(group.getGroup(field, index), fieldType.asGroupType());
- recordConsumer.endGroup();
- }
- }
- recordConsumer.endField(fieldName, field);
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/example/data/simple/BinaryValue.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/example/data/simple/BinaryValue.java b/parquet-column/src/main/java/parquet/example/data/simple/BinaryValue.java
deleted file mode 100644
index fc49099..0000000
--- a/parquet-column/src/main/java/parquet/example/data/simple/BinaryValue.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.example.data.simple;
-
-import parquet.io.api.Binary;
-import parquet.io.api.RecordConsumer;
-
-
-public class BinaryValue extends Primitive {
-
- private final Binary binary;
-
- public BinaryValue(Binary binary) {
- this.binary = binary;
- }
-
- @Override
- public Binary getBinary() {
- return binary;
- }
-
- @Override
- public String getString() {
- return binary.toStringUsingUTF8();
- }
-
- @Override
- public void writeValue(RecordConsumer recordConsumer) {
- recordConsumer.addBinary(binary);
- }
-
- @Override
- public String toString() {
- return getString();
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/example/data/simple/BooleanValue.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/example/data/simple/BooleanValue.java b/parquet-column/src/main/java/parquet/example/data/simple/BooleanValue.java
deleted file mode 100644
index 87172da..0000000
--- a/parquet-column/src/main/java/parquet/example/data/simple/BooleanValue.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.example.data.simple;
-
-import parquet.io.api.RecordConsumer;
-
-public class BooleanValue extends Primitive {
-
- private final boolean bool;
- public BooleanValue(boolean bool) {
- this.bool = bool;
- }
-
- @Override
- public String toString() {
- return String.valueOf(bool);
- }
-
- @Override
- public boolean getBoolean() {
- return bool;
- }
-
- @Override
- public void writeValue(RecordConsumer recordConsumer) {
- recordConsumer.addBoolean(bool);
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/example/data/simple/DoubleValue.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/example/data/simple/DoubleValue.java b/parquet-column/src/main/java/parquet/example/data/simple/DoubleValue.java
deleted file mode 100644
index 5a47b46..0000000
--- a/parquet-column/src/main/java/parquet/example/data/simple/DoubleValue.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.example.data.simple;
-
-import parquet.io.api.RecordConsumer;
-
-public class DoubleValue extends Primitive {
-
- private final double value;
-
- public DoubleValue(double value) {
- this.value = value;
- }
-
- @Override
- public double getDouble() {
- return value;
- }
-
- @Override
- public void writeValue(RecordConsumer recordConsumer) {
- recordConsumer.addDouble(value);
- }
-
- @Override
- public String toString() {
- return String.valueOf(value);
- }
-}