You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2007/08/30 19:18:25 UTC
svn commit: r571247 - in /lucene/hadoop/trunk: ./
src/java/org/apache/hadoop/dfs/ src/test/org/apache/hadoop/dfs/
Author: dhruba
Date: Thu Aug 30 10:18:24 2007
New Revision: 571247
URL: http://svn.apache.org/viewvc?rev=571247&view=rev
Log:
HADOOP-1656. The blockSize of a file is stored persistently
in the file inode. (Dhruba Borthakur via dhruba)
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Thu Aug 30 10:18:24 2007
@@ -16,6 +16,9 @@
HADOOP-1621. FileStatus is now a concrete class and FileSystem.listPaths
is deprecated and replaced with listStatus. (Chris Douglas via omalley)
+ HADOOP-1656. The blockSize of a file is stored persistently in the file
+ inode. (Dhruba Borthakur via dhruba)
+
NEW FEATURES
HADOOP-1636. Allow configuration of the number of jobs kept in
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java Thu Aug 30 10:18:24 2007
@@ -32,8 +32,9 @@
/**
* Compared to the previous version the following changes have been introduced:
* 16 : removed deprecated obtainLock() and releaseLock().
+ * 17 : getBlockSize replaced by getPreferredBlockSize
*/
- public static final long versionID = 16L;
+ public static final long versionID = 17L;
///////////////////////////////////////
// File contents
@@ -248,7 +249,7 @@
* @return The number of bytes in each block
* @throws IOException
*/
- public long getBlockSize(String filename) throws IOException;
+ public long getPreferredBlockSize(String filename) throws IOException;
/**
* Enter, leave or get safe mode.
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java Thu Aug 30 10:18:24 2007
@@ -134,7 +134,7 @@
methodNameToPolicyMap.put("renewLease", methodPolicy);
methodNameToPolicyMap.put("getStats", methodPolicy);
methodNameToPolicyMap.put("getDatanodeReport", methodPolicy);
- methodNameToPolicyMap.put("getBlockSize", methodPolicy);
+ methodNameToPolicyMap.put("getPreferredBlockSize", methodPolicy);
methodNameToPolicyMap.put("getEditLogSize", methodPolicy);
methodNameToPolicyMap.put("complete", methodPolicy);
methodNameToPolicyMap.put("getEditLogSize", methodPolicy);
@@ -212,7 +212,7 @@
public long getBlockSize(String f) throws IOException {
try {
- return namenode.getBlockSize(f);
+ return namenode.getPreferredBlockSize(f);
} catch (IOException ie) {
LOG.warn("Problem getting block size: " +
StringUtils.stringifyException(ie));
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java Thu Aug 30 10:18:24 2007
@@ -51,7 +51,7 @@
super(node.computeContentsLength(),
node.isDirectory(),
node.isDirectory() ? 0 : ((INodeFile)node).getReplication(),
- node.isDirectory() ? 0 : ((INodeFile)node).getBlockSize(),
+ node.isDirectory() ? 0 : ((INodeFile)node).getPreferredBlockSize(),
node.getModificationTime(), new Path(path));
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java Thu Aug 30 10:18:24 2007
@@ -156,7 +156,7 @@
// Version is reflected in the data storage file.
// Versions are negative.
// Decrement LAYOUT_VERSION to define a new version.
- public static final int LAYOUT_VERSION = -7;
+ public static final int LAYOUT_VERSION = -8;
// Current version:
- // Block Level CRCs added.
+ // added blocksize to inode
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java Thu Aug 30 10:18:24 2007
@@ -114,7 +114,8 @@
/**
* Add the given filename to the fs.
*/
- public boolean addFile(String path, Block[] blocks, short replication) {
+ public boolean addFile(String path, Block[] blocks, short replication,
+ long preferredBlockSize) {
waitForReady();
// Always do an implicit mkdirs for parent directory tree.
@@ -122,7 +123,9 @@
if (!mkdirs(new Path(path).getParent().toString(), modTime)) {
return false;
}
- INodeFile newNode = (INodeFile)unprotectedAddFile(path, blocks, replication, modTime);
+ INodeFile newNode = (INodeFile)unprotectedAddFile(path, blocks, replication,
+ modTime,
+ preferredBlockSize);
if (newNode == null) {
NameNode.stateChangeLog.info("DIR* FSDirectory.addFile: "
+"failed to add "+path+" with "
@@ -141,12 +144,14 @@
INode unprotectedAddFile( String path,
Block[] blocks,
short replication,
- long modificationTime) {
+ long modificationTime,
+ long preferredBlockSize) {
INode newNode;
if (blocks == null)
newNode = new INodeDirectory(modificationTime);
else
- newNode = new INodeFile(blocks, replication, modificationTime);
+ newNode = new INodeFile(blocks, replication, modificationTime,
+ preferredBlockSize);
synchronized (rootDir) {
try {
newNode = rootDir.addNode(path, newNode);
@@ -304,10 +309,10 @@
/**
* Get the blocksize of a file
* @param filename the filename
- * @return the number of bytes in the first block
+ * @return the number of bytes
* @throws IOException if it is a directory or does not exist.
*/
- public long getBlockSize(String filename) throws IOException {
+ public long getPreferredBlockSize(String filename) throws IOException {
synchronized (rootDir) {
INode fileNode = rootDir.getNode(filename);
if (fileNode == null) {
@@ -317,7 +322,7 @@
throw new IOException("Getting block size of a directory: " +
filename);
}
- return ((INodeFile)fileNode).getBlockSize();
+ return ((INodeFile)fileNode).getPreferredBlockSize();
}
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java Thu Aug 30 10:18:24 2007
@@ -233,6 +233,7 @@
while (true) {
long timestamp = 0;
long mtime = 0;
+ long blockSize = 0;
byte opcode = -1;
try {
opcode = in.readByte();
@@ -253,10 +254,13 @@
aw = new ArrayWritable(UTF8.class);
aw.readFields(in);
writables = aw.get();
- if (logVersion >= -4 && writables.length != 2 ||
- logVersion < -4 && writables.length != 3) {
- throw new IOException("Incorrect data fortmat. "
- + "Name & replication pair expected");
+ if (-4 <= logVersion && writables.length != 2 ||
+ -7 <= logVersion && logVersion < -4 && writables.length != 3||
+ logVersion < -7 && writables.length != 4) {
+ throw new IOException("Incorrect data format." +
+ " logVersion is " + logVersion +
+ " but writables.length is " +
+ writables.length + ". ");
}
name = (UTF8) writables[0];
replication = Short.parseShort(
@@ -265,6 +269,9 @@
if (logVersion < -4) {
mtime = Long.parseLong(((UTF8)writables[2]).toString());
}
+ if (logVersion < -7) {
+ blockSize = Long.parseLong(((UTF8)writables[3]).toString());
+ }
}
// get blocks
aw = new ArrayWritable(Block.class);
@@ -272,8 +279,21 @@
writables = aw.get();
Block blocks[] = new Block[writables.length];
System.arraycopy(writables, 0, blocks, 0, blocks.length);
+
+ // Older versions of HDFS does not store the block size in inode.
+ // If the file has more than one block, use the size of the
+ // first block as the blocksize. Otherwise leave the blockSize as 0
+ // to indicate that we do not really know the "true" blocksize of
+ // this file.
+ if (-7 <= logVersion) {
+ assert blockSize == 0;
+ if (blocks.length > 1) {
+ blockSize = blocks[0].getNumBytes();
+ }
+ }
// add to the file tree
- fsDir.unprotectedAddFile(name.toString(), blocks, replication, mtime);
+ fsDir.unprotectedAddFile(name.toString(), blocks, replication,
+ mtime, blockSize);
break;
}
case OP_SET_REPLICATION: {
@@ -302,7 +322,7 @@
aw.readFields(in);
writables = aw.get();
if (writables.length != 3) {
- throw new IOException("Incorrect data fortmat. "
+ throw new IOException("Incorrect data format. "
+ "Mkdir operation.");
}
src = (UTF8) writables[0];
@@ -324,7 +344,7 @@
aw.readFields(in);
writables = aw.get();
if (writables.length != 2) {
- throw new IOException("Incorrect data fortmat. "
+ throw new IOException("Incorrect data format. "
+ "delete operation.");
}
src = (UTF8) writables[0];
@@ -345,7 +365,7 @@
aw.readFields(in);
writables = aw.get();
if (writables.length != 2) {
- throw new IOException("Incorrect data fortmat. "
+ throw new IOException("Incorrect data format. "
+ "Mkdir operation.");
}
src = (UTF8) writables[0];
@@ -480,7 +500,8 @@
UTF8 nameReplicationPair[] = new UTF8[] {
new UTF8(path),
FSEditLog.toLogReplication(newNode.getReplication()),
- FSEditLog.toLogTimeStamp(newNode.getModificationTime())};
+ FSEditLog.toLogLong(newNode.getModificationTime()),
+ FSEditLog.toLogLong(newNode.getPreferredBlockSize())};
logEdit(OP_ADD,
new ArrayWritable(UTF8.class, nameReplicationPair),
new ArrayWritable(Block.class, newNode.getBlocks()));
@@ -492,7 +513,7 @@
void logMkDir(String path, INode newNode) {
UTF8 info[] = new UTF8[] {
new UTF8(path),
- FSEditLog.toLogTimeStamp(newNode.getModificationTime())
+ FSEditLog.toLogLong(newNode.getModificationTime())
};
logEdit(OP_MKDIR, new ArrayWritable(UTF8.class, info), null);
}
@@ -505,7 +526,7 @@
UTF8 info[] = new UTF8[] {
new UTF8(src),
new UTF8(dst),
- FSEditLog.toLogTimeStamp(timestamp)};
+ FSEditLog.toLogLong(timestamp)};
logEdit(OP_RENAME, new ArrayWritable(UTF8.class, info), null);
}
@@ -524,7 +545,7 @@
void logDelete(String src, long timestamp) {
UTF8 info[] = new UTF8[] {
new UTF8(src),
- FSEditLog.toLogTimeStamp(timestamp)};
+ FSEditLog.toLogLong(timestamp)};
logEdit(OP_DELETE, new ArrayWritable(UTF8.class, info), null);
}
@@ -552,7 +573,7 @@
return Short.parseShort(replication.toString());
}
- static UTF8 toLogTimeStamp(long timestamp) {
+ static UTF8 toLogLong(long timestamp) {
return new UTF8(Long.toString(timestamp));
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java Thu Aug 30 10:18:24 2007
@@ -669,6 +669,7 @@
for (int i = 0; i < numFiles; i++) {
UTF8 name = new UTF8();
long modificationTime = 0;
+ long blockSize = 0;
name.readFields(in);
// version 0 does not support per file replication
if (!(imgVersion >= 0)) {
@@ -678,6 +679,9 @@
if (imgVersion <= -5) {
modificationTime = in.readLong();
}
+ if (imgVersion <= -8) {
+ blockSize = in.readLong();
+ }
int numBlocks = in.readInt();
Block blocks[] = null;
if (numBlocks > 0) {
@@ -687,8 +691,19 @@
blocks[j].readFields(in);
}
}
+ // Older versions of HDFS does not store the block size in inode.
+ // If the file has more than one block, use the size of the
+ // first block as the blocksize. Otherwise leave the blockSize as 0
+ // to indicate that we do not really know the "true" blocksize of this
+ // file.
+ if (-7 <= imgVersion) {
+ assert blockSize == 0;
+ if (numBlocks > 1) {
+ blockSize = blocks[0].getNumBytes();
+ }
+ }
fsDir.unprotectedAddFile(name.toString(), blocks, replication,
- modificationTime);
+ modificationTime, blockSize);
}
// load datanode info
@@ -814,6 +829,7 @@
INodeFile fileINode = (INodeFile)inode;
out.writeShort(fileINode.getReplication());
out.writeLong(inode.getModificationTime());
+ out.writeLong(fileINode.getPreferredBlockSize());
Block[] blocks = fileINode.getBlocks();
out.writeInt(blocks.length);
for (Block blk : blocks)
@@ -823,6 +839,7 @@
// write directory inode
out.writeShort(0); // replication
out.writeLong(inode.getModificationTime());
+ out.writeLong(0); // preferred block size
out.writeInt(0); // # of blocks
}
for(INode child : ((INodeDirectory)inode).getChildren()) {
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Thu Aug 30 10:18:24 2007
@@ -673,8 +673,8 @@
return true;
}
- public long getBlockSize(String filename) throws IOException {
- return dir.getBlockSize(filename);
+ public long getPreferredBlockSize(String filename) throws IOException {
+ return dir.getPreferredBlockSize(filename);
}
/**
@@ -824,7 +824,7 @@
// Now we can add the name to the filesystem. This file has no
// blocks associated with it.
//
- if (!dir.addFile(src, new Block[0], replication)) {
+ if (!dir.addFile(src, new Block[0], replication, blockSize)) {
throw new IOException("DIR* NameSystem.startFile: " +
"Unable to add file to namespace.");
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java Thu Aug 30 10:18:24 2007
@@ -404,13 +404,16 @@
class INodeFile extends INode {
private Block blocks[] = null;
protected short blockReplication;
+ protected long preferredBlockSize;
/**
*/
- INodeFile(Block blocks[], short replication, long modificationTime) {
+ INodeFile(Block blocks[], short replication, long modificationTime,
+ long preferredBlockSize) {
super(modificationTime);
this.blocks = blocks;
this.blockReplication = replication;
+ this.preferredBlockSize = preferredBlockSize;
}
boolean isDirectory() {
@@ -464,14 +467,10 @@
}
/**
- * Get the block size of the first block
+ * Get the preferred block size of the file.
* @return the number of bytes
*/
- long getBlockSize() {
- if (blocks == null || blocks.length == 0) {
- return 0;
- } else {
- return blocks[0].getNumBytes();
- }
+ long getPreferredBlockSize() {
+ return preferredBlockSize;
}
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java Thu Aug 30 10:18:24 2007
@@ -373,8 +373,8 @@
}
}
- public long getBlockSize(String filename) throws IOException {
- return namesystem.getBlockSize(filename);
+ public long getPreferredBlockSize(String filename) throws IOException {
+ return namesystem.getPreferredBlockSize(filename);
}
/**
Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java?rev=571247&r1=571246&r2=571247&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java Thu Aug 30 10:18:24 2007
@@ -40,7 +40,8 @@
new Path(System.getProperty("test.build.data","/tmp"))
.toString().replace(' ', '+');
- private void writeFile(FileSystem fileSys, Path name, int repl)
+ private void writeFile(FileSystem fileSys, Path name, int repl,
+ int fileSize, int blockSize)
throws IOException {
// create and write a file that contains three blocks of data
FSDataOutputStream stm = fileSys.create(name, true,
@@ -94,12 +95,14 @@
// create a file in home directory
//
Path file1 = new Path("filestatus.dat");
- writeFile(fs, file1, 1);
+ writeFile(fs, file1, 1, fileSize, blockSize);
System.out.println("Created file filestatus.dat with one "
+ " replicas.");
checkFile(fs, file1, 1);
assertTrue(file1 + " should be a file",
fs.getFileStatus(file1).isDir() == false);
+ assertTrue(fs.getFileStatus(file1).getBlockSize() == blockSize);
+ assertTrue(fs.getFileStatus(file1).getReplication() == 1);
System.out.println("Path : \"" + file1 + "\"");
// create a directory
@@ -110,7 +113,20 @@
assertTrue(dir + " should be a directory",
fs.getFileStatus(path).isDir() == true);
System.out.println("Dir : \"" + dir + "\"");
-
+
+ // create another file that is smaller than a block.
+ //
+ Path file2 = new Path("filestatus2.dat");
+ writeFile(fs, file2, 1, blockSize/4, blockSize);
+ System.out.println("Created file filestatus2.dat with one "
+ + " replicas.");
+ checkFile(fs, file2, 1);
+ System.out.println("Path : \"" + file2 + "\"");
+
+ // verify file attributes
+ assertTrue(fs.getFileStatus(file2).getBlockSize() == blockSize);
+ assertTrue(fs.getFileStatus(file2).getReplication() == 1);
+
} finally {
fs.close();
cluster.shutdown();