You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2008/12/22 21:09:19 UTC

svn commit: r728759 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/test/org/apache/hadoop/hive/ql/metadata/

Author: zshao
Date: Mon Dec 22 12:09:18 2008
New Revision: 728759

URL: http://svn.apache.org/viewvc?rev=728759&view=rev
Log:
Reverted HIVE-126.

Removed:
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestPartition.java
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=728759&r1=728758&r2=728759&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Dec 22 12:09:18 2008
@@ -26,9 +26,6 @@
 
   IMPROVEMENTS
 
-    HIVE-126. Don't fetch information on Partitions from HDFS instead of 
-    MetaStore. (Johan Oskarsson via zshao)
-
     HIVE-181. Restore UDFTestLength unit test for UDFs.
     (David Phillips via zshao)
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=728759&r1=728758&r2=728759&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Mon Dec 22 12:09:18 2008
@@ -510,6 +510,16 @@
       for (org.apache.hadoop.hive.metastore.api.Partition tpart : tParts) {
         parts.add(new Partition(tbl, tpart));
       }
+      // get the partitions on the HDFS location
+      List<Partition> hdfsPartitions = tbl.getPartitionsFromHDFS();
+      if(hdfsPartitions.size() != parts.size()) {
+        // HACK: either we are connecting to old metastore or metadata is out of sync with data
+        // TODO: for the former case, move this logic into OLD metastore and compare 
+        // the two lists here for any conflict between metadata and data
+        LOG.error("Metadata for partitions doesn't match the data in HDFS. Table name: " + tbl.getName());
+        // let's trust hdfs partitions for now
+        return hdfsPartitions;
+      }
       return parts;
     } else {
       // create an empty partition. 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=728759&r1=728758&r2=728759&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Mon Dec 22 12:09:18 2008
@@ -22,6 +22,7 @@
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.regex.Matcher;
@@ -35,6 +36,7 @@
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 
 /**
  * A Hive Table Partition: is a fundamental storage unit within a Table
@@ -82,7 +84,7 @@
         // is same as the table partition. 
         this.partPath = table.getPath();
       }
-      spec = createSpec(tbl, tp);
+      spec = makeSpecFromPath();
       
       URI tmpURI = table.getDataLocation();
       try {
@@ -93,24 +95,83 @@
       }
     }
     
-    /**
-     * Creates a partition name -> value spec map object
-     * @param tbl Use the information from this table.
-     * @param tp Use the information from this partition.
-     * @return Partition name to value mapping.
-     */
-    private LinkedHashMap<String, String> createSpec(Table tbl, 
-        org.apache.hadoop.hive.metastore.api.Partition tp) {
-      
-      List<FieldSchema> fsl = tbl.getPartCols();
-      List<String> tpl = tp.getValues();
-      LinkedHashMap<String, String> spec = new LinkedHashMap<String, String>();
-      for (int i = 0; i < tbl.getPartCols().size(); i++) {
-        FieldSchema fs = fsl.get(i);
-        String value = tpl.get(i);
-        spec.put(fs.getName(), value);
+    // This is used when a Partition object is created solely from the hdfs partition directories
+    Partition(Table tbl, Path path) throws HiveException {
+      this.table = tbl;
+      // initialize the tPartition(thrift object) with the data from path and  table
+      this.tPartition = new org.apache.hadoop.hive.metastore.api.Partition();
+      this.tPartition.setDbName(tbl.getDbName());
+      this.tPartition.setTableName(tbl.getName());
+      StorageDescriptor sd = tbl.getTTable().getSd();
+      StorageDescriptor psd = new StorageDescriptor(
+          sd.getCols(), sd.getLocation(), sd.getInputFormat(), sd.getOutputFormat(),
+          sd.isCompressed(), sd.getNumBuckets(), sd.getSerdeInfo(), sd.getBucketCols(),
+          sd.getSortCols(), new HashMap<String, String>());
+      this.tPartition.setSd(psd);
+      // change the partition location
+      if(table.isPartitioned()) {
+        this.partPath = path;
+      } else {
+        // We are in the HACK territory. SemanticAnalyzer expects a single partition whose schema
+        // is same as the table partition. 
+        this.partPath = table.getPath();
+      }
+      spec = makeSpecFromPath();
+      psd.setLocation(this.partPath.toString());
+      List<String> partVals = new ArrayList<String> ();
+      tPartition.setValues(partVals);
+      for (FieldSchema field : tbl.getPartCols()) {
+        partVals.add(spec.get(field.getName()));
+      }
+      try {
+        this.partName = Warehouse.makePartName(tbl.getPartCols(), partVals);
+      } catch (MetaException e) {
+        throw new HiveException("Invalid partition key values", e);
+      }
+    }
+    
+    static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
+    private LinkedHashMap<String, String> makeSpecFromPath() throws HiveException {
+      // Keep going up the path till it equals the parent
+      Path currPath = this.partPath;
+      LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
+      List<FieldSchema> pcols = this.table.getPartCols();
+      for(int i = 0; i < pcols.size(); i++) {
+        FieldSchema col =  pcols.get(pcols.size() - i - 1);
+        if (currPath == null) {
+          throw new HiveException("Out of path components while expecting key: " + col.getName());
+        }
+        String component = currPath.getName();
+        // Check if the component is either of the form k=v
+        // or is the first component
+        // if neither is true then this is an invalid path
+        Matcher m = pat.matcher(component);
+        if (m.matches()) {
+          String k = m.group(1);
+          String v = m.group(2);
+
+          if (!k.equals(col.getName())) {
+            throw new HiveException("Key mismatch expected: " + col.getName() + " and got: " + k);
+          }
+          if (partSpec.containsKey(k)) {
+            throw new HiveException("Key " + k + " defined at two levels");
+          }
+
+          partSpec.put(k, v);
+        }
+        else {
+          throw new HiveException("Path " + currPath.toString() + " not a valid path");
+        }
+        currPath = currPath.getParent();
+      }
+      // reverse the list since we checked the part from leaf dir to table's base dir
+      LinkedHashMap<String, String> newSpec = new LinkedHashMap<String, String>();
+      for(int i = 0; i < table.getPartCols().size(); i++) {
+        FieldSchema  field = table.getPartCols().get(i);
+        String val = partSpec.get(field.getName());
+        newSpec.put(field.getName(), val);
       }
-      return spec;
+      return newSpec;
     }
 
     public URI makePartURI(LinkedHashMap<String, String> spec) throws HiveException {

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=728759&r1=728758&r2=728759&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Mon Dec 22 12:09:18 2008
@@ -28,9 +28,11 @@
 import java.util.Map;
 import java.util.Properties;
 import java.util.Vector;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
@@ -50,6 +52,7 @@
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.util.StringUtils;
 
 
 /**
@@ -563,5 +566,55 @@
   public List<Order> getSortCols() {
     return getTTable().getSd().getSortCols();
   }
+
+  private static void getPartPaths(FileSystem fs, Path p, Vector<String> partPaths) throws IOException {
+    // Base case for recursion
+    if (fs.isFile(p)) {
+      if (!partPaths.contains(p.getParent().toString())) {
+        partPaths.add(p.getParent().toString());
+      }
+    }
+    else {
+      FileStatus [] dirs = fs.listStatus(p);
+
+      if (dirs.length != 0 ) {
+        for(int i=0; i < dirs.length; ++i) {
+          getPartPaths(fs, dirs[i].getPath(), partPaths);
+        }
+      }
+      else {
+        // This is an empty partition
+        if (!partPaths.contains(p.toString())) {
+          partPaths.add(p.toString());
+        }
+      }
+    }
+
+    return;
+  }
+
+  static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
+  public List<Partition> getPartitionsFromHDFS() throws HiveException {
+    ArrayList<Partition> ret = new ArrayList<Partition> ();
+    FileSystem fs = null;
+    Vector<String> partPaths = new Vector<String>();
+
+    try {
+      fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
+      getPartPaths(fs, new Path(getDataLocation().getPath()), partPaths);
+      for(String partPath: partPaths) {
+        Path tmpPath = new Path(partPath);
+        if(!fs.getFileStatus(tmpPath).isDir()) {
+          throw new HiveException("Data in hdfs is messed up. Table " + getName() + " has a partition " + partPath + " that is not a directory");
+        }
+        ret.add(new Partition(this, tmpPath));
+      }
+    } catch (IOException e) {
+      LOG.error(StringUtils.stringifyException(e));
+      throw new HiveException("DB Error: Table " + getDataLocation() + " message: " + e.getMessage());
+    }
+
+    return ret;
+  }
   
 };