You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ya...@apache.org on 2010/03/10 18:38:15 UTC
svn commit: r921468 - in /hadoop/pig/trunk/contrib/zebra: CHANGES.txt
src/java/org/apache/hadoop/zebra/io/BasicTable.java
src/java/org/apache/hadoop/zebra/io/ColumnGroup.java
Author: yanz
Date: Wed Mar 10 17:38:15 2010
New Revision: 921468
URL: http://svn.apache.org/viewvc?rev=921468&view=rev
Log:
PIG-1207 Data sanity check should be performed at the end of writing instead of later at query time (yanz)
Modified:
hadoop/pig/trunk/contrib/zebra/CHANGES.txt
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java
Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=921468&r1=921467&r2=921468&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Wed Mar 10 17:38:15 2010
@@ -14,6 +14,8 @@ Trunk (unreleased changes)
IMPROVEMENTS
+ PIG-1207 Data sanity check should be performed at the end of writing instead of later at query time (yanz)
+
PIG-1206 Storing descendingly sorted PIG table as unsorted table (yanz)
PIG-1240 zebra manifest file enhancement (gauravj via yanz)
Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java?rev=921468&r1=921467&r2=921468&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java Wed Mar 10 17:38:15 2010
@@ -1459,9 +1459,33 @@ public class BasicTable {
if (!finished)
finish();
try {
+ ColumnGroup.CGIndex firstCGIndex = null, cgIndex;
+ int first = -1;
for (int nx = 0; nx < colGroups.length; nx++) {
if (colGroups[nx] != null) {
colGroups[nx].close();
+ if (first == -1)
+ {
+ first = nx;
+ firstCGIndex = colGroups[nx].index;
+ } else {
+ cgIndex = colGroups[nx].index;
+ if (cgIndex.size() != firstCGIndex.size())
+ throw new IOException("Column Group "+colGroups[nx].path.getName()+
+ " has different number of files than in column group " + colGroups[first].path.getName());
+ int size = firstCGIndex.size();
+ for (int i = 0; i < size; i++)
+ {
+ if (!cgIndex.get(i).name.equals(firstCGIndex.get(i).name))
+ throw new IOException("File["+i+"] in Column Group "+colGroups[nx].path.getName()+
+ " has a different name: "+cgIndex.get(i).name+" than " +
+ firstCGIndex.get(i).name + " in column group " + colGroups[first].path.getName());
+ if (cgIndex.get(i).rows != firstCGIndex.get(i).rows)
+ throw new IOException("File "+cgIndex.get(i).name+"Column Group "+colGroups[nx].path.getName()+
+ " has a different number of rows, " + cgIndex.get(i).rows + ", than " +
+ firstCGIndex.get(i).rows + " in column group " + colGroups[first].path.getName());
+ }
+ }
}
}
metaWriter.close();
Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java?rev=921468&r1=921467&r2=921468&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java Wed Mar 10 17:38:15 2010
@@ -1519,6 +1519,7 @@ class ColumnGroup {
FileSystem fs;
CGSchema cgschema;
private boolean finished, closed;
+ CGIndex index;
/**
* Create a ColumnGroup writer. The semantics are as follows:
@@ -1700,8 +1701,8 @@ class ColumnGroup {
private void createIndex() throws IOException {
MetaFile.Writer metaFile =
- MetaFile.createWriter(makeMetaFilePath(finalOutputPath), conf);
- CGIndex index = buildIndex(fs, finalOutputPath, false, conf);
+ MetaFile.createWriter(makeMetaFilePath(finalOutputPath), conf);
+ index = buildIndex(fs, finalOutputPath, false, conf);
DataOutputStream dos = metaFile.createMetaBlock(BLOCK_NAME_INDEX);
try {
index.write(dos);