You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2008/12/22 21:09:19 UTC
svn commit: r728759 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/metadata/
ql/src/test/org/apache/hadoop/hive/ql/metadata/
Author: zshao
Date: Mon Dec 22 12:09:18 2008
New Revision: 728759
URL: http://svn.apache.org/viewvc?rev=728759&view=rev
Log:
Reverted HIVE-126.
Removed:
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestPartition.java
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=728759&r1=728758&r2=728759&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Dec 22 12:09:18 2008
@@ -26,9 +26,6 @@
IMPROVEMENTS
- HIVE-126. Don't fetch information on Partitions from HDFS instead of
- MetaStore. (Johan Oskarsson via zshao)
-
HIVE-181. Restore UDFTestLength unit test for UDFs.
(David Phillips via zshao)
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=728759&r1=728758&r2=728759&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Mon Dec 22 12:09:18 2008
@@ -510,6 +510,16 @@
for (org.apache.hadoop.hive.metastore.api.Partition tpart : tParts) {
parts.add(new Partition(tbl, tpart));
}
+ // get the partitions on the HDFS location
+ List<Partition> hdfsPartitions = tbl.getPartitionsFromHDFS();
+ if(hdfsPartitions.size() != parts.size()) {
+ // HACK: either we are connecting to old metastore or metadata is out of sync with data
+ // TODO: for the former case, move this logic into OLD metastore and compare
+ // the two lists here for any conflict between metadata and data
+ LOG.error("Metadata for partitions doesn't match the data in HDFS. Table name: " + tbl.getName());
+ // let's trust hdfs partitions for now
+ return hdfsPartitions;
+ }
return parts;
} else {
// create an empty partition.
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=728759&r1=728758&r2=728759&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Mon Dec 22 12:09:18 2008
@@ -22,6 +22,7 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.regex.Matcher;
@@ -35,6 +36,7 @@
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
/**
* A Hive Table Partition: is a fundamental storage unit within a Table
@@ -82,7 +84,7 @@
// is same as the table partition.
this.partPath = table.getPath();
}
- spec = createSpec(tbl, tp);
+ spec = makeSpecFromPath();
URI tmpURI = table.getDataLocation();
try {
@@ -93,24 +95,83 @@
}
}
- /**
- * Creates a partition name -> value spec map object
- * @param tbl Use the information from this table.
- * @param tp Use the information from this partition.
- * @return Partition name to value mapping.
- */
- private LinkedHashMap<String, String> createSpec(Table tbl,
- org.apache.hadoop.hive.metastore.api.Partition tp) {
-
- List<FieldSchema> fsl = tbl.getPartCols();
- List<String> tpl = tp.getValues();
- LinkedHashMap<String, String> spec = new LinkedHashMap<String, String>();
- for (int i = 0; i < tbl.getPartCols().size(); i++) {
- FieldSchema fs = fsl.get(i);
- String value = tpl.get(i);
- spec.put(fs.getName(), value);
+ // This is used when a Partition object is created solely from the hdfs partition directories
+ Partition(Table tbl, Path path) throws HiveException {
+ this.table = tbl;
+ // initialize the tPartition(thrift object) with the data from path and table
+ this.tPartition = new org.apache.hadoop.hive.metastore.api.Partition();
+ this.tPartition.setDbName(tbl.getDbName());
+ this.tPartition.setTableName(tbl.getName());
+ StorageDescriptor sd = tbl.getTTable().getSd();
+ StorageDescriptor psd = new StorageDescriptor(
+ sd.getCols(), sd.getLocation(), sd.getInputFormat(), sd.getOutputFormat(),
+ sd.isCompressed(), sd.getNumBuckets(), sd.getSerdeInfo(), sd.getBucketCols(),
+ sd.getSortCols(), new HashMap<String, String>());
+ this.tPartition.setSd(psd);
+ // change the partition location
+ if(table.isPartitioned()) {
+ this.partPath = path;
+ } else {
+ // We are in the HACK territory. SemanticAnalyzer expects a single partition whose schema
+ // is same as the table partition.
+ this.partPath = table.getPath();
+ }
+ spec = makeSpecFromPath();
+ psd.setLocation(this.partPath.toString());
+ List<String> partVals = new ArrayList<String> ();
+ tPartition.setValues(partVals);
+ for (FieldSchema field : tbl.getPartCols()) {
+ partVals.add(spec.get(field.getName()));
+ }
+ try {
+ this.partName = Warehouse.makePartName(tbl.getPartCols(), partVals);
+ } catch (MetaException e) {
+ throw new HiveException("Invalid partition key values", e);
+ }
+ }
+
+ static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
+ private LinkedHashMap<String, String> makeSpecFromPath() throws HiveException {
+ // Keep going up the path till it equals the parent
+ Path currPath = this.partPath;
+ LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
+ List<FieldSchema> pcols = this.table.getPartCols();
+ for(int i = 0; i < pcols.size(); i++) {
+ FieldSchema col = pcols.get(pcols.size() - i - 1);
+ if (currPath == null) {
+ throw new HiveException("Out of path components while expecting key: " + col.getName());
+ }
+ String component = currPath.getName();
+ // Check if the component is either of the form k=v
+ // or is the first component
+ // if neither is true then this is an invalid path
+ Matcher m = pat.matcher(component);
+ if (m.matches()) {
+ String k = m.group(1);
+ String v = m.group(2);
+
+ if (!k.equals(col.getName())) {
+ throw new HiveException("Key mismatch expected: " + col.getName() + " and got: " + k);
+ }
+ if (partSpec.containsKey(k)) {
+ throw new HiveException("Key " + k + " defined at two levels");
+ }
+
+ partSpec.put(k, v);
+ }
+ else {
+ throw new HiveException("Path " + currPath.toString() + " not a valid path");
+ }
+ currPath = currPath.getParent();
+ }
+ // reverse the list since we checked the part from leaf dir to table's base dir
+ LinkedHashMap<String, String> newSpec = new LinkedHashMap<String, String>();
+ for(int i = 0; i < table.getPartCols().size(); i++) {
+ FieldSchema field = table.getPartCols().get(i);
+ String val = partSpec.get(field.getName());
+ newSpec.put(field.getName(), val);
}
- return spec;
+ return newSpec;
}
public URI makePartURI(LinkedHashMap<String, String> spec) throws HiveException {
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=728759&r1=728758&r2=728759&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Mon Dec 22 12:09:18 2008
@@ -28,9 +28,11 @@
import java.util.Map;
import java.util.Properties;
import java.util.Vector;
+import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
@@ -50,6 +52,7 @@
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.util.StringUtils;
/**
@@ -563,5 +566,55 @@
public List<Order> getSortCols() {
return getTTable().getSd().getSortCols();
}
+
+ private static void getPartPaths(FileSystem fs, Path p, Vector<String> partPaths) throws IOException {
+ // Base case for recursion
+ if (fs.isFile(p)) {
+ if (!partPaths.contains(p.getParent().toString())) {
+ partPaths.add(p.getParent().toString());
+ }
+ }
+ else {
+ FileStatus [] dirs = fs.listStatus(p);
+
+ if (dirs.length != 0 ) {
+ for(int i=0; i < dirs.length; ++i) {
+ getPartPaths(fs, dirs[i].getPath(), partPaths);
+ }
+ }
+ else {
+ // This is an empty partition
+ if (!partPaths.contains(p.toString())) {
+ partPaths.add(p.toString());
+ }
+ }
+ }
+
+ return;
+ }
+
+ static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
+ public List<Partition> getPartitionsFromHDFS() throws HiveException {
+ ArrayList<Partition> ret = new ArrayList<Partition> ();
+ FileSystem fs = null;
+ Vector<String> partPaths = new Vector<String>();
+
+ try {
+ fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
+ getPartPaths(fs, new Path(getDataLocation().getPath()), partPaths);
+ for(String partPath: partPaths) {
+ Path tmpPath = new Path(partPath);
+ if(!fs.getFileStatus(tmpPath).isDir()) {
+ throw new HiveException("Data in hdfs is messed up. Table " + getName() + " has a partition " + partPath + " that is not a directory");
+ }
+ ret.add(new Partition(this, tmpPath));
+ }
+ } catch (IOException e) {
+ LOG.error(StringUtils.stringifyException(e));
+ throw new HiveException("DB Error: Table " + getDataLocation() + " message: " + e.getMessage());
+ }
+
+ return ret;
+ }
};