You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2007/08/30 19:18:25 UTC

svn commit: r571247 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/dfs/ src/test/org/apache/hadoop/dfs/

Author: dhruba
Date: Thu Aug 30 10:18:24 2007
New Revision: 571247

URL: http://svn.apache.org/viewvc?rev=571247&view=rev
Log:
HADOOP-1656.  The blockSize of a file is stored persistently 
in the file inode. (Dhruba Borthakur via dhruba)


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Thu Aug 30 10:18:24 2007
@@ -16,6 +16,9 @@
     HADOOP-1621.  FileStatus is now a concrete class and FileSystem.listPaths
     is deprecated and replaced with listStatus. (Chris Douglas via omalley)
 
+    HADOOP-1656.  The blockSize of a file is stored persistently in the file
+    inode. (Dhruba Borthakur via dhruba)
+
   NEW FEATURES
 
     HADOOP-1636.  Allow configuration of the number of jobs kept in

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java Thu Aug 30 10:18:24 2007
@@ -32,8 +32,9 @@
   /**
    * Compared to the previous version the following changes have been introduced:
    * 16 : removed deprecated obtainLock() and releaseLock(). 
+   * 17 : getBlockSize replaced by getPreferredBlockSize
    */
-  public static final long versionID = 16L;
+  public static final long versionID = 17L;
   
   ///////////////////////////////////////
   // File contents
@@ -248,7 +249,7 @@
    * @return The number of bytes in each block
    * @throws IOException
    */
-  public long getBlockSize(String filename) throws IOException;
+  public long getPreferredBlockSize(String filename) throws IOException;
 
   /**
    * Enter, leave or get safe mode.

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java Thu Aug 30 10:18:24 2007
@@ -134,7 +134,7 @@
     methodNameToPolicyMap.put("renewLease", methodPolicy);
     methodNameToPolicyMap.put("getStats", methodPolicy);
     methodNameToPolicyMap.put("getDatanodeReport", methodPolicy);
-    methodNameToPolicyMap.put("getBlockSize", methodPolicy);
+    methodNameToPolicyMap.put("getPreferredBlockSize", methodPolicy);
     methodNameToPolicyMap.put("getEditLogSize", methodPolicy);
     methodNameToPolicyMap.put("complete", methodPolicy);
     methodNameToPolicyMap.put("getEditLogSize", methodPolicy);
@@ -212,7 +212,7 @@
     
   public long getBlockSize(String f) throws IOException {
     try {
-      return namenode.getBlockSize(f);
+      return namenode.getPreferredBlockSize(f);
     } catch (IOException ie) {
       LOG.warn("Problem getting block size: " + 
           StringUtils.stringifyException(ie));

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java Thu Aug 30 10:18:24 2007
@@ -51,7 +51,7 @@
     super(node.computeContentsLength(),
           node.isDirectory(), 
           node.isDirectory() ? 0 : ((INodeFile)node).getReplication(), 
-          node.isDirectory() ? 0 : ((INodeFile)node).getBlockSize(),
+          node.isDirectory() ? 0 : ((INodeFile)node).getPreferredBlockSize(),
           node.getModificationTime(), new Path(path));
   }
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java Thu Aug 30 10:18:24 2007
@@ -156,7 +156,7 @@
   // Version is reflected in the data storage file.
   // Versions are negative.
   // Decrement LAYOUT_VERSION to define a new version.
-  public static final int LAYOUT_VERSION = -7;
+  public static final int LAYOUT_VERSION = -8;
   // Current version: 
-  // Block Level CRCs added.
+  // added blocksize to inode
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java Thu Aug 30 10:18:24 2007
@@ -114,7 +114,8 @@
   /**
    * Add the given filename to the fs.
    */
-  public boolean addFile(String path, Block[] blocks, short replication) {
+  public boolean addFile(String path, Block[] blocks, short replication,
+                         long preferredBlockSize) {
     waitForReady();
 
     // Always do an implicit mkdirs for parent directory tree.
@@ -122,7 +123,9 @@
     if (!mkdirs(new Path(path).getParent().toString(), modTime)) {
       return false;
     }
-    INodeFile newNode = (INodeFile)unprotectedAddFile(path, blocks, replication, modTime);
+    INodeFile newNode = (INodeFile)unprotectedAddFile(path, blocks, replication,
+                                                      modTime, 
+                                                      preferredBlockSize);
     if (newNode == null) {
       NameNode.stateChangeLog.info("DIR* FSDirectory.addFile: "
                                    +"failed to add "+path+" with "
@@ -141,12 +144,14 @@
   INode unprotectedAddFile( String path, 
                             Block[] blocks, 
                             short replication,
-                            long modificationTime) {
+                            long modificationTime,
+                            long preferredBlockSize) {
     INode newNode;
     if (blocks == null)
       newNode = new INodeDirectory(modificationTime);
     else
-      newNode = new INodeFile(blocks, replication, modificationTime);
+      newNode = new INodeFile(blocks, replication, modificationTime,
+                              preferredBlockSize);
     synchronized (rootDir) {
       try {
         newNode = rootDir.addNode(path, newNode);
@@ -304,10 +309,10 @@
   /**
    * Get the blocksize of a file
    * @param filename the filename
-   * @return the number of bytes in the first block
+   * @return the number of bytes 
    * @throws IOException if it is a directory or does not exist.
    */
-  public long getBlockSize(String filename) throws IOException {
+  public long getPreferredBlockSize(String filename) throws IOException {
     synchronized (rootDir) {
       INode fileNode = rootDir.getNode(filename);
       if (fileNode == null) {
@@ -317,7 +322,7 @@
         throw new IOException("Getting block size of a directory: " + 
                               filename);
       }
-      return ((INodeFile)fileNode).getBlockSize();
+      return ((INodeFile)fileNode).getPreferredBlockSize();
     }
   }
     

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java Thu Aug 30 10:18:24 2007
@@ -233,6 +233,7 @@
         while (true) {
           long timestamp = 0;
           long mtime = 0;
+          long blockSize = 0;
           byte opcode = -1;
           try {
             opcode = in.readByte();
@@ -253,10 +254,13 @@
               aw = new ArrayWritable(UTF8.class);
               aw.readFields(in);
               writables = aw.get(); 
-              if (logVersion >= -4 && writables.length != 2 ||
-                  logVersion < -4 && writables.length != 3) {
-                  throw new IOException("Incorrect data fortmat. " 
-                                        + "Name & replication pair expected");
+              if (-4 <= logVersion && writables.length != 2 ||
+                  -7 <= logVersion && logVersion < -4 && writables.length != 3||
+                  logVersion < -7 && writables.length != 4) {
+                  throw new IOException("Incorrect data format."  +
+                                        " logVersion is " + logVersion +
+                                        " but writables.length is " +
+                                        writables.length + ". ");
               }
               name = (UTF8) writables[0];
               replication = Short.parseShort(
@@ -265,6 +269,9 @@
               if (logVersion < -4) {
                 mtime = Long.parseLong(((UTF8)writables[2]).toString());
               }
+              if (logVersion < -7) {
+                blockSize = Long.parseLong(((UTF8)writables[3]).toString());
+              }
             }
             // get blocks
             aw = new ArrayWritable(Block.class);
@@ -272,8 +279,21 @@
             writables = aw.get();
             Block blocks[] = new Block[writables.length];
             System.arraycopy(writables, 0, blocks, 0, blocks.length);
+
+            // Older versions of HDFS does not store the block size in inode.
+            // If the file has more than one block, use the size of the
+            // first block as the blocksize. Otherwise leave the blockSize as 0
+            // to indicate that we do not really know the "true" blocksize of 
+            // this file.
+            if (-7 <= logVersion) {
+              assert blockSize == 0;
+              if (blocks.length > 1) {
+                blockSize = blocks[0].getNumBytes();
+              }
+            }
             // add to the file tree
-            fsDir.unprotectedAddFile(name.toString(), blocks, replication, mtime);
+            fsDir.unprotectedAddFile(name.toString(), blocks, replication, 
+                                     mtime, blockSize);
             break;
           }
           case OP_SET_REPLICATION: {
@@ -302,7 +322,7 @@
               aw.readFields(in);
               writables = aw.get(); 
               if (writables.length != 3) {
-                throw new IOException("Incorrect data fortmat. " 
+                throw new IOException("Incorrect data format. " 
                                       + "Mkdir operation.");
               }
               src = (UTF8) writables[0];
@@ -324,7 +344,7 @@
               aw.readFields(in);
               writables = aw.get(); 
               if (writables.length != 2) {
-                throw new IOException("Incorrect data fortmat. " 
+                throw new IOException("Incorrect data format. " 
                                       + "delete operation.");
               }
               src = (UTF8) writables[0];
@@ -345,7 +365,7 @@
               aw.readFields(in);
               writables = aw.get(); 
               if (writables.length != 2) {
-                throw new IOException("Incorrect data fortmat. " 
+                throw new IOException("Incorrect data format. " 
                                       + "Mkdir operation.");
               }
               src = (UTF8) writables[0];
@@ -480,7 +500,8 @@
     UTF8 nameReplicationPair[] = new UTF8[] { 
       new UTF8(path), 
       FSEditLog.toLogReplication(newNode.getReplication()),
-      FSEditLog.toLogTimeStamp(newNode.getModificationTime())};
+      FSEditLog.toLogLong(newNode.getModificationTime()),
+      FSEditLog.toLogLong(newNode.getPreferredBlockSize())};
     logEdit(OP_ADD,
             new ArrayWritable(UTF8.class, nameReplicationPair), 
             new ArrayWritable(Block.class, newNode.getBlocks()));
@@ -492,7 +513,7 @@
   void logMkDir(String path, INode newNode) {
     UTF8 info[] = new UTF8[] {
       new UTF8(path),
-      FSEditLog.toLogTimeStamp(newNode.getModificationTime())
+      FSEditLog.toLogLong(newNode.getModificationTime())
     };
     logEdit(OP_MKDIR, new ArrayWritable(UTF8.class, info), null);
   }
@@ -505,7 +526,7 @@
     UTF8 info[] = new UTF8[] { 
       new UTF8(src),
       new UTF8(dst),
-      FSEditLog.toLogTimeStamp(timestamp)};
+      FSEditLog.toLogLong(timestamp)};
     logEdit(OP_RENAME, new ArrayWritable(UTF8.class, info), null);
   }
   
@@ -524,7 +545,7 @@
   void logDelete(String src, long timestamp) {
     UTF8 info[] = new UTF8[] { 
       new UTF8(src),
-      FSEditLog.toLogTimeStamp(timestamp)};
+      FSEditLog.toLogLong(timestamp)};
     logEdit(OP_DELETE, new ArrayWritable(UTF8.class, info), null);
   }
   
@@ -552,7 +573,7 @@
     return Short.parseShort(replication.toString());
   }
 
-  static UTF8 toLogTimeStamp(long timestamp) {
+  static UTF8 toLogLong(long timestamp) {
     return new UTF8(Long.toString(timestamp));
   }
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java Thu Aug 30 10:18:24 2007
@@ -669,6 +669,7 @@
       for (int i = 0; i < numFiles; i++) {
         UTF8 name = new UTF8();
         long modificationTime = 0;
+        long blockSize = 0;
         name.readFields(in);
         // version 0 does not support per file replication
         if (!(imgVersion >= 0)) {
@@ -678,6 +679,9 @@
         if (imgVersion <= -5) {
           modificationTime = in.readLong();
         }
+        if (imgVersion <= -8) {
+          blockSize = in.readLong();
+        }
         int numBlocks = in.readInt();
         Block blocks[] = null;
         if (numBlocks > 0) {
@@ -687,8 +691,19 @@
             blocks[j].readFields(in);
           }
         }
+        // Older versions of HDFS does not store the block size in inode.
+        // If the file has more than one block, use the size of the 
+        // first block as the blocksize. Otherwise leave the blockSize as 0
+        // to indicate that we do not really know the "true" blocksize of this
+        // file.
+        if (-7 <= imgVersion) {
+          assert blockSize == 0;
+          if (numBlocks > 1) {
+            blockSize = blocks[0].getNumBytes();
+          }
+        }
         fsDir.unprotectedAddFile(name.toString(), blocks, replication,
-                                 modificationTime);
+                                 modificationTime, blockSize);
       }
       
       // load datanode info
@@ -814,6 +829,7 @@
         INodeFile fileINode = (INodeFile)inode;
         out.writeShort(fileINode.getReplication());
         out.writeLong(inode.getModificationTime());
+        out.writeLong(fileINode.getPreferredBlockSize());
         Block[] blocks = fileINode.getBlocks();
         out.writeInt(blocks.length);
         for (Block blk : blocks)
@@ -823,6 +839,7 @@
       // write directory inode
       out.writeShort(0);  // replication
       out.writeLong(inode.getModificationTime());
+      out.writeLong(0);   // preferred block size
       out.writeInt(0);    // # of blocks
     }
     for(INode child : ((INodeDirectory)inode).getChildren()) {

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Thu Aug 30 10:18:24 2007
@@ -673,8 +673,8 @@
     return true;
   }
     
-  public long getBlockSize(String filename) throws IOException {
-    return dir.getBlockSize(filename);
+  public long getPreferredBlockSize(String filename) throws IOException {
+    return dir.getPreferredBlockSize(filename);
   }
     
   /**
@@ -824,7 +824,7 @@
     // Now we can add the name to the filesystem. This file has no
     // blocks associated with it.
     //
-    if (!dir.addFile(src, new Block[0], replication)) {
+    if (!dir.addFile(src, new Block[0], replication, blockSize)) {
       throw new IOException("DIR* NameSystem.startFile: " +
                             "Unable to add file to namespace.");
     }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java Thu Aug 30 10:18:24 2007
@@ -404,13 +404,16 @@
 class INodeFile extends INode {
   private Block blocks[] = null;
   protected short blockReplication;
+  protected long preferredBlockSize;
 
   /**
    */
-  INodeFile(Block blocks[], short replication, long modificationTime) {
+  INodeFile(Block blocks[], short replication, long modificationTime,
+            long preferredBlockSize) {
     super(modificationTime);
     this.blocks = blocks;
     this.blockReplication = replication;
+    this.preferredBlockSize = preferredBlockSize;
   }
 
   boolean isDirectory() {
@@ -464,14 +467,10 @@
   }
 
   /**
-   * Get the block size of the first block
+   * Get the preferred block size of the file.
    * @return the number of bytes
    */
-  long getBlockSize() {
-    if (blocks == null || blocks.length == 0) {
-      return 0;
-    } else {
-      return blocks[0].getNumBytes();
-    }
+  long getPreferredBlockSize() {
+    return preferredBlockSize;
   }
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java Thu Aug 30 10:18:24 2007
@@ -373,8 +373,8 @@
     }
   }
 
-  public long getBlockSize(String filename) throws IOException {
-    return namesystem.getBlockSize(filename);
+  public long getPreferredBlockSize(String filename) throws IOException {
+    return namesystem.getPreferredBlockSize(filename);
   }
     
   /**

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java Thu Aug 30 10:18:24 2007
@@ -40,7 +40,8 @@
     new Path(System.getProperty("test.build.data","/tmp"))
     .toString().replace(' ', '+');
   
-  private void writeFile(FileSystem fileSys, Path name, int repl)
+  private void writeFile(FileSystem fileSys, Path name, int repl,
+                         int fileSize, int blockSize)
     throws IOException {
     // create and write a file that contains three blocks of data
     FSDataOutputStream stm = fileSys.create(name, true,
@@ -94,12 +95,14 @@
       // create a file in home directory
       //
       Path file1 = new Path("filestatus.dat");
-      writeFile(fs, file1, 1);
+      writeFile(fs, file1, 1, fileSize, blockSize);
       System.out.println("Created file filestatus.dat with one "
                          + " replicas.");
       checkFile(fs, file1, 1);
       assertTrue(file1 + " should be a file", 
                   fs.getFileStatus(file1).isDir() == false);
+      assertTrue(fs.getFileStatus(file1).getBlockSize() == blockSize);
+      assertTrue(fs.getFileStatus(file1).getReplication() == 1);
       System.out.println("Path : \"" + file1 + "\"");
 
       // create a directory
@@ -110,7 +113,20 @@
       assertTrue(dir + " should be a directory", 
                  fs.getFileStatus(path).isDir() == true);
       System.out.println("Dir : \"" + dir + "\"");
-    
+
+      // create another file that is smaller than a block.
+      //
+      Path file2 = new Path("filestatus2.dat");
+      writeFile(fs, file2, 1, blockSize/4, blockSize);
+      System.out.println("Created file filestatus2.dat with one "
+                         + " replicas.");
+      checkFile(fs, file2, 1);
+      System.out.println("Path : \"" + file2 + "\"");
+
+      // verify file attributes
+      assertTrue(fs.getFileStatus(file2).getBlockSize() == blockSize);
+      assertTrue(fs.getFileStatus(file2).getReplication() == 1);
+
     } finally {
       fs.close();
       cluster.shutdown();