You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2008/12/22 20:51:06 UTC

svn commit: r728755 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/test/org/apache/hadoop/hive/ql/metadata/

Author: zshao
Date: Mon Dec 22 11:51:06 2008
New Revision: 728755

URL: http://svn.apache.org/viewvc?rev=728755&view=rev
Log:
HIVE-126. Don't fetch information on Partitions from HDFS instead of MetaStore. (Johan Oskarsson via zshao)

Added:
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestPartition.java
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=728755&r1=728754&r2=728755&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Dec 22 11:51:06 2008
@@ -26,6 +26,9 @@
 
   IMPROVEMENTS
 
+    HIVE-126. Don't fetch information on Partitions from HDFS instead of 
+    MetaStore. (Johan Oskarsson via zshao)
+
     HIVE-181. Restore UDFTestLength unit test for UDFs.
     (David Phillips via zshao)
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=728755&r1=728754&r2=728755&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Mon Dec 22 11:51:06 2008
@@ -510,16 +510,6 @@
       for (org.apache.hadoop.hive.metastore.api.Partition tpart : tParts) {
         parts.add(new Partition(tbl, tpart));
       }
-      // get the partitions on the HDFS location
-      List<Partition> hdfsPartitions = tbl.getPartitionsFromHDFS();
-      if(hdfsPartitions.size() != parts.size()) {
-        // HACK: either we are connecting to old metastore or metadata is out of sync with data
-        // TODO: for the former case, move this logic into OLD metastore and compare 
-        // the two lists here for any conflict between metadata and data
-        LOG.error("Metadata for partitions doesn't match the data in HDFS. Table name: " + tbl.getName());
-        // let's trust hdfs partitions for now
-        return hdfsPartitions;
-      }
       return parts;
     } else {
       // create an empty partition. 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=728755&r1=728754&r2=728755&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Mon Dec 22 11:51:06 2008
@@ -22,7 +22,6 @@
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.regex.Matcher;
@@ -36,7 +35,6 @@
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 
 /**
  * A Hive Table Partition: is a fundamental storage unit within a Table
@@ -84,7 +82,7 @@
         // is same as the table partition. 
         this.partPath = table.getPath();
       }
-      spec = makeSpecFromPath();
+      spec = createSpec(tbl, tp);
       
       URI tmpURI = table.getDataLocation();
       try {
@@ -95,83 +93,24 @@
       }
     }
     
-    // This is used when a Partition object is created solely from the hdfs partition directories
-    Partition(Table tbl, Path path) throws HiveException {
-      this.table = tbl;
-      // initialize the tPartition(thrift object) with the data from path and  table
-      this.tPartition = new org.apache.hadoop.hive.metastore.api.Partition();
-      this.tPartition.setDbName(tbl.getDbName());
-      this.tPartition.setTableName(tbl.getName());
-      StorageDescriptor sd = tbl.getTTable().getSd();
-      StorageDescriptor psd = new StorageDescriptor(
-          sd.getCols(), sd.getLocation(), sd.getInputFormat(), sd.getOutputFormat(),
-          sd.isCompressed(), sd.getNumBuckets(), sd.getSerdeInfo(), sd.getBucketCols(),
-          sd.getSortCols(), new HashMap<String, String>());
-      this.tPartition.setSd(psd);
-      // change the partition location
-      if(table.isPartitioned()) {
-        this.partPath = path;
-      } else {
-        // We are in the HACK territory. SemanticAnalyzer expects a single partition whose schema
-        // is same as the table partition. 
-        this.partPath = table.getPath();
-      }
-      spec = makeSpecFromPath();
-      psd.setLocation(this.partPath.toString());
-      List<String> partVals = new ArrayList<String> ();
-      tPartition.setValues(partVals);
-      for (FieldSchema field : tbl.getPartCols()) {
-        partVals.add(spec.get(field.getName()));
-      }
-      try {
-        this.partName = Warehouse.makePartName(tbl.getPartCols(), partVals);
-      } catch (MetaException e) {
-        throw new HiveException("Invalid partition key values", e);
-      }
-    }
-    
-    static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
-    private LinkedHashMap<String, String> makeSpecFromPath() throws HiveException {
-      // Keep going up the path till it equals the parent
-      Path currPath = this.partPath;
-      LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
-      List<FieldSchema> pcols = this.table.getPartCols();
-      for(int i = 0; i < pcols.size(); i++) {
-        FieldSchema col =  pcols.get(pcols.size() - i - 1);
-        if (currPath == null) {
-          throw new HiveException("Out of path components while expecting key: " + col.getName());
-        }
-        String component = currPath.getName();
-        // Check if the component is either of the form k=v
-        // or is the first component
-        // if neither is true then this is an invalid path
-        Matcher m = pat.matcher(component);
-        if (m.matches()) {
-          String k = m.group(1);
-          String v = m.group(2);
-
-          if (!k.equals(col.getName())) {
-            throw new HiveException("Key mismatch expected: " + col.getName() + " and got: " + k);
-          }
-          if (partSpec.containsKey(k)) {
-            throw new HiveException("Key " + k + " defined at two levels");
-          }
-
-          partSpec.put(k, v);
-        }
-        else {
-          throw new HiveException("Path " + currPath.toString() + " not a valid path");
-        }
-        currPath = currPath.getParent();
-      }
-      // reverse the list since we checked the part from leaf dir to table's base dir
-      LinkedHashMap<String, String> newSpec = new LinkedHashMap<String, String>();
-      for(int i = 0; i < table.getPartCols().size(); i++) {
-        FieldSchema  field = table.getPartCols().get(i);
-        String val = partSpec.get(field.getName());
-        newSpec.put(field.getName(), val);
+    /**
+     * Creates a partition name -> value spec map object
+     * @param tbl Use the information from this table.
+     * @param tp Use the information from this partition.
+     * @return Partition name to value mapping.
+     */
+    private LinkedHashMap<String, String> createSpec(Table tbl, 
+        org.apache.hadoop.hive.metastore.api.Partition tp) {
+      
+      List<FieldSchema> fsl = tbl.getPartCols();
+      List<String> tpl = tp.getValues();
+      LinkedHashMap<String, String> spec = new LinkedHashMap<String, String>();
+      for (int i = 0; i < tbl.getPartCols().size(); i++) {
+        FieldSchema fs = fsl.get(i);
+        String value = tpl.get(i);
+        spec.put(fs.getName(), value);
       }
-      return newSpec;
+      return spec;
     }
 
     public URI makePartURI(LinkedHashMap<String, String> spec) throws HiveException {

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=728755&r1=728754&r2=728755&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Mon Dec 22 11:51:06 2008
@@ -28,11 +28,9 @@
 import java.util.Map;
 import java.util.Properties;
 import java.util.Vector;
-import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
@@ -52,7 +50,6 @@
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.util.StringUtils;
 
 
 /**
@@ -566,55 +563,5 @@
   public List<Order> getSortCols() {
     return getTTable().getSd().getSortCols();
   }
-
-  private static void getPartPaths(FileSystem fs, Path p, Vector<String> partPaths) throws IOException {
-    // Base case for recursion
-    if (fs.isFile(p)) {
-      if (!partPaths.contains(p.getParent().toString())) {
-        partPaths.add(p.getParent().toString());
-      }
-    }
-    else {
-      FileStatus [] dirs = fs.listStatus(p);
-
-      if (dirs.length != 0 ) {
-        for(int i=0; i < dirs.length; ++i) {
-          getPartPaths(fs, dirs[i].getPath(), partPaths);
-        }
-      }
-      else {
-        // This is an empty partition
-        if (!partPaths.contains(p.toString())) {
-          partPaths.add(p.toString());
-        }
-      }
-    }
-
-    return;
-  }
-
-  static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
-  public List<Partition> getPartitionsFromHDFS() throws HiveException {
-    ArrayList<Partition> ret = new ArrayList<Partition> ();
-    FileSystem fs = null;
-    Vector<String> partPaths = new Vector<String>();
-
-    try {
-      fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
-      getPartPaths(fs, new Path(getDataLocation().getPath()), partPaths);
-      for(String partPath: partPaths) {
-        Path tmpPath = new Path(partPath);
-        if(!fs.getFileStatus(tmpPath).isDir()) {
-          throw new HiveException("Data in hdfs is messed up. Table " + getName() + " has a partition " + partPath + " that is not a directory");
-        }
-        ret.add(new Partition(this, tmpPath));
-      }
-    } catch (IOException e) {
-      LOG.error(StringUtils.stringifyException(e));
-      throw new HiveException("DB Error: Table " + getDataLocation() + " message: " + e.getMessage());
-    }
-
-    return ret;
-  }
   
 };

Added: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestPartition.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestPartition.java?rev=728755&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestPartition.java (added)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestPartition.java Mon Dec 22 11:51:06 2008
@@ -0,0 +1,46 @@
+package org.apache.hadoop.hive.ql.metadata;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Partition;
+
+/**
+ * Test the partition class.
+ */
+public class TestPartition extends TestCase {
+
+  private static final String PARTITION_COL = "partcol";
+  private static final String PARTITION_VALUE = "value";
+  private static final String TABLENAME = "tablename";
+
+  /**
+   * Test that the Partition spec is created properly.
+   */
+  public void testPartition() throws HiveException, URISyntaxException {
+    Partition tp = new Partition();
+    tp.setTableName(TABLENAME);
+    
+    List<String> values = new ArrayList<String>();
+    values.add(PARTITION_VALUE);
+    tp.setValues(values);
+    
+    List<FieldSchema> partCols = new ArrayList<FieldSchema>();
+    partCols.add(new FieldSchema(PARTITION_COL, "string", ""));
+    
+    Table tbl = new Table(TABLENAME);
+    tbl.setDataLocation(new URI("tmplocation"));
+    tbl.setPartCols(partCols);
+    
+    Map<String, String> spec = new org.apache.hadoop.hive.ql.metadata.Partition(tbl, tp).getSpec();
+    assertFalse(spec.isEmpty());
+    assertEquals(spec.get(PARTITION_COL), PARTITION_VALUE);
+  }
+  
+}