You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by mc...@apache.org on 2006/02/13 08:25:37 UTC
svn commit: r377317 - in /lucene/hadoop/trunk/src/java/org/apache/hadoop:
dfs/ fs/
Author: mc
Date: Sun Feb 12 23:25:35 2006
New Revision: 377317
URL: http://svn.apache.org/viewcvs?rev=377317&view=rev
Log:
Add a bunch of updated comments and JavaDocs to
the Distributed File System package.
Modified:
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/package.html
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java Sun Feb 12 23:25:35 2006
@@ -20,10 +20,13 @@
import java.io.*;
/****************************************************
- * A BlockCommand is an instruction to a datanode regarding
- * some blocks under its control
+ * A BlockCommand is an instruction to a datanode
+ * regarding some blocks under its control. It tells
+ * the DataNode to either invalidate a set of indicated
+ * blocks, or to copy a set of indicated blocks to
+ * another DataNode.
*
- * @author Michael Cafarella
+ * @author Mike Cafarella
****************************************************/
class BlockCommand implements Writable {
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java Sun Feb 12 23:25:35 2006
@@ -18,65 +18,98 @@
import java.io.*;
/**********************************************************************
- * Protocol that an DFS client uses to communicate with the NameNode.
- * It's used to manipulate the namespace, and obtain datanode info.
+ * ClientProtocol is used by a piece of DFS user code to communicate
+ * with the NameNode. User code can manipulate the directory namespace,
+ * as well as open/close file streams, etc.
*
* @author Mike Cafarella
**********************************************************************/
interface ClientProtocol {
- /**
- * Open an existing file. Get back block and datanode info
+ ///////////////////////////////////////
+ // File contents
+ ///////////////////////////////////////
+ /**
+ * Open an existing file, at the given name. Returns block
+ * and DataNode info. The client will then have to contact
+ * each indicated DataNode to obtain the actual data. There
+ * is no need to call close() or any other function after
+ * calling open().
*/
public LocatedBlock[] open(String src) throws IOException;
/**
- * Create a new file. Get back block and datanode info
+ * Create a new file. Get back block and datanode info,
+ * which describes where the first block should be written.
+ *
+ * Successfully calling this method prevents any other
+ * client from creating a file under the given name, but
+ * the caller must invoke complete() for the file to be
+ * added to the filesystem.
+ *
+ * Blocks have a maximum size. Clients that intend to
+ * create multi-block files must also use reportWrittenBlock()
+ * and addBlock().
*/
public LocatedBlock create(String src, String clientName, boolean overwrite) throws IOException;
/**
- * The client wants to write an additional block to the indicated
- * filename (which must currently be open for writing). Return
- * block and datanode info. A null response means the caller
- * should attempt the call again.
+ * A client that has written a block of data can report completion
+ * back to the NameNode with reportWrittenBlock(). Clients cannot
+ * obtain an additional block until the previous one has either been
+ * reported as written or abandoned.
*/
- public LocatedBlock addBlock(String src) throws IOException;
+ public void reportWrittenBlock(LocatedBlock b) throws IOException;
/**
- * The client wants to report a block it has just successfully
- * written to one or more datanodes. Client-written blocks are
- * always reported by the client, not by the datanode.
+ * If the client has not yet called reportWrittenBlock(), it can
+ * give up on it by calling abandonBlock(). The client can then
+ * either obtain a new block, or complete or abandon the file.
+ *
+ * Any partial writes to the block will be garbage-collected.
*/
- public void reportWrittenBlock(LocatedBlock b) throws IOException;
+ public void abandonBlock(Block b, String src) throws IOException;
/**
- * The client wants to abandon writing to the indicated block,
- * part of the indicated (currently-open) filename.
+ * A client that wants to write an additional block to the
+ * indicated filename (which must currently be open for writing)
+ * should call addBlock().
+ *
+ * addBlock() returns block and datanode info, just like the initial
+ * call to create().
+ *
+ * A null response means the NameNode could not allocate a block,
+ * and that the caller should try again.
*/
- public void abandonBlock(Block b, String src) throws IOException;
+ public LocatedBlock addBlock(String src) throws IOException;
/**
- * The client wants to abandon writing to the current file, and
- * let anyone else grab it.
+ * A client that wants to abandon writing to the current file
+ * should call abandonFileInProgress(). After this call, any
+ * client can call create() to obtain the filename.
+ *
+ * Any blocks that have been written for the file will be
+ * garbage-collected.
*/
public void abandonFileInProgress(String src) throws IOException;
/**
* The client is done writing data to the given filename, and would
- * like to complete it. Returns whether the file has been closed
- * correctly (true) or whether caller should try again (false).
- * (Because the namenode is waiting for a block to complete).
+ * like to complete it.
+ *
+ * The function returns whether the file has been closed successfully.
+ * If the function returns false, the caller should try again.
+ *
+ * A call to complete() will not return true until all the file's
+ * blocks have been replicated the minimum number of times. Thus,
+ * DataNode failures may cause a client to call complete() several
+ * times before succeeding.
*/
public boolean complete(String src, String clientName) throws IOException;
-
- /**
- * The client wants to read the indicated filename at a certain offset.
- * Return a list of hostnames where the data can be found. (Return
- * a set of hostnames for every block.)
- */
- public String[][] getHints(String src, long start, long len) throws IOException;
+ ///////////////////////////////////////
+ // Namespace management
+ ///////////////////////////////////////
/**
* Rename an item in the fs namespace
*/
@@ -104,36 +137,71 @@
public boolean mkdirs(String src) throws IOException;
/**
- * The client is trying to obtain a lock. Return whether the lock has
- * been seized correctly (true), or whether the client should try again
- * (false).
+ * Get a listing of the indicated directory
+ */
+ public DFSFileInfo[] getListing(String src) throws IOException;
+
+ ///////////////////////////////////////
+ // System issues and management
+ ///////////////////////////////////////
+ /**
+ * getHints() returns a list of hostnames that store data for
+ * a specific file region. It returns a set of hostnames for
+ * every block within the indicated region.
+ *
+ * This function is very useful when writing code that considers
+ * data-placement when performing operations. For example, the
+ * MapReduce system tries to schedule tasks on the same machines
+ * as the data-block the task processes.
+ */
+ public String[][] getHints(String src, long start, long len) throws IOException;
+ /**
+ * obtainLock() is used for lock managemnet. It returns true if
+ * the lock has been seized correctly. It returns false if the
+ * lock could not be obtained, and the client should try again.
+ *
+ * Locking is a part of most filesystems and is useful for a
+ * number of inter-process synchronization tasks.
*/
public boolean obtainLock(String src, String clientName, boolean exclusive) throws IOException;
/**
- * The client wants to release a held lock. Return whether the lock was
- * correctly released (true), or whether the client should wait and try the
- * call again (false).
+ * releaseLock() is called if the client would like to release
+ * a held lock. It returns true if the lock is correctly released.
+ * It returns false if the client should wait and try again.
*/
public boolean releaseLock(String src, String clientName) throws IOException;
/**
- * The client machine wants to obtain a lease
+ * Client programs can cause stateful changes in the NameNode
+ * that affect other clients. A client may obtain a file and
+ * neither abandon nor complete it. A client might hold a series
+ * of locks that prevent other clients from proceeding.
+ * Clearly, it would be bad if a client held a bunch of locks
+ * that it never gave up. This can happen easily if the client
+ * dies unexpectedly.
+ *
+ * So, the NameNode will revoke the locks and live file-creates
+ * for clients that it thinks have died. A client tells the
+ * NameNode that it is still alive by periodically calling
+ * renewLease(). If a certain amount of time passes since
+ * the last call to renewLease(), the NameNode assumes the
+ * client has died.
*/
public void renewLease(String clientName) throws IOException;
/**
- * Get a listing of the indicated directory
- */
- public DFSFileInfo[] getListing(String src) throws IOException;
-
- /**
* Get a set of statistics about the filesystem.
+ * Right now, only two values are returned.
+ * [0] contains the total storage capacity of the system,
+ * in bytes.
+ * [1] contains the available storage of the system, in bytes.
*/
public long[] getStats() throws IOException;
/**
* Get a full report on the system's current datanodes.
+ * One DatanodeInfo object is returned for each DataNode.
*/
public DatanodeInfo[] getDatanodeReport() throws IOException;
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java Sun Feb 12 23:25:35 2006
@@ -27,8 +27,15 @@
import java.util.logging.*;
/********************************************************
- * DFSClient can connect to a Hadoop Filesystem and perform basic file tasks.
- * Connects to a namenode daemon.
+ * DFSClient can connect to a Hadoop Filesystem and
+ * perform basic file tasks. It uses the ClientProtocol
+ * to communicate with a NameNode daemon, and connects
+ * directly to DataNodes to read/write block data.
+ *
+ * Hadoop DFS users should obtain an instance of
+ * DistributedFileSystem, which uses DFSClient to handle
+ * filesystem tasks.
+ *
* @author Mike Cafarella, Tessa MacDuff
********************************************************/
class DFSClient implements FSConstants {
@@ -41,7 +48,8 @@
Daemon leaseChecker;
- /** Create a new DFSClient connected to the given namenode server.
+ /**
+ * Create a new DFSClient connected to the given namenode server.
*/
public DFSClient(InetSocketAddress nameNodeAddr, Configuration conf) {
this.namenode = (ClientProtocol) RPC.getProxy(ClientProtocol.class, nameNodeAddr, conf);
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Sun Feb 12 23:25:35 2006
@@ -25,14 +25,35 @@
import java.util.logging.*;
/**********************************************************
- * DataNode controls just one critical table:
- * block-> BLOCK_SIZE stream of bytes
+ * DataNode is a class (and program) that stores a set of
+ * blocks for a DFS deployment. A single deployment can
+ * have one or many DataNodes. Each DataNode communicates
+ * regularly with a single NameNode. It also communicates
+ * with client code and other DataNodes from time to time.
*
- * This info is stored on disk (the NameNode is responsible for
- * asking other machines to replicate the data). The DataNode
+ * DataNodes store a series of named blocks. The DataNode
+ * allows client code to read these blocks, or to write new
+ * block data. The DataNode may also, in response to instructions
+ * from its NameNode, delete blocks or copy blocks to/from other
+ * DataNodes.
+ *
+ * The DataNode maintains just one critical table:
+ * block-> stream of bytes (of BLOCK_SIZE or less)
+ *
+ * This info is stored on a local disk. The DataNode
* reports the table's contents to the NameNode upon startup
* and every so often afterwards.
*
+ * DataNodes spend their lives in an endless loop of asking
+ * the NameNode for something to do. A NameNode cannot connect
+ * to a DataNode directly; a NameNode simply returns values from
+ * functions invoked by a DataNode.
+ *
+ * DataNodes maintain an open server socket so that client code
+ * or other DataNodes can read/write data. The host/port for
+ * this server is reported to the NameNode, which then sends that
+ * information to clients or other DataNodes that might be interested.
+ *
* @author Mike Cafarella
**********************************************************/
public class DataNode implements FSConstants, Runnable {
@@ -73,7 +94,8 @@
private Configuration fConf;
/**
- * Create given a configuration and a dataDir.
+ * Create the DataNode given a configuration and a dataDir.
+ * 'dataDir' is where the blocks are stored.
*/
public DataNode(Configuration conf, String datadir) throws IOException {
this(InetAddress.getLocalHost().getHostName(),
@@ -82,7 +104,8 @@
}
/**
- * Needs a directory to find its data (and config info)
+ * A DataNode can also be created with configuration information
+ * explicitly given.
*/
public DataNode(String machineName, File datadir, InetSocketAddress nameNodeAddr, Configuration conf) throws IOException {
this.namenode = (DatanodeProtocol) RPC.getProxy(DatanodeProtocol.class, nameNodeAddr, conf);
@@ -112,6 +135,7 @@
}
/**
+ * Return the namenode's identifier
*/
public String getNamenode() {
//return namenode.toString();
@@ -132,7 +156,8 @@
}
/**
- * Main loop for the DataNode. Runs until shutdown.
+ * Main loop for the DataNode. Runs until shutdown,
+ * forever calling remote NameNode functions.
*/
public void offerService() throws Exception {
long wakeups = 0;
@@ -243,7 +268,10 @@
}
/**
- * Server used for receiving/sending a block of data
+ * Server used for receiving/sending a block of data.
+ * This is created to listen for requests from clients or
+ * other DataNodes. This small server does not use the
+ * Hadoop IPC mechanism.
*/
class DataXceiveServer implements Runnable {
boolean shouldListen = true;
@@ -285,6 +313,7 @@
}
/**
+ * Read/write data from/to the DataXceiveServer.
*/
public void run() {
try {
@@ -582,7 +611,8 @@
}
/**
- * Used for transferring a block of data
+ * Used for transferring a block of data. This class
+ * sends a piece of data to another DataNode.
*/
class DataTransfer implements Runnable {
InetSocketAddress curTarget;
@@ -744,7 +774,7 @@
"}";
}
- /**
+ /**
*/
public static void main(String args[]) throws IOException {
LogFormatter.setShowThreadIDs(true);
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java Sun Feb 12 23:25:35 2006
@@ -21,7 +21,9 @@
import java.util.*;
/**************************************************
- * DatanodeInfo tracks stats on a given node
+ * DatanodeInfo tracks stats on a given DataNode,
+ * such as available storage capacity, last update
+ * time, etc.
*
* @author Mike Cafarella
**************************************************/
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java Sun Feb 12 23:25:35 2006
@@ -19,17 +19,50 @@
import java.io.*;
/**********************************************************************
- * Protocol that an DFS datanode uses to communicate with the NameNode.
- * It's used to upload current load information and block records.
+ * Protocol that a DFS datanode uses to communicate with the NameNode.
+ * It's used to upload current load information and block reports.
+ *
+ * The only way a NameNode can communicate with a DataNode is by
+ * returning values from these functions.
*
* @author Michael Cafarella
**********************************************************************/
interface DatanodeProtocol {
-
+ /**
+ * sendHeartbeat() tells the NameNode that the DataNode is still
+ * alive and well. Includes some status info, too.
+ */
public void sendHeartbeat(String sender, long capacity, long remaining) throws IOException;
+
+ /**
+ * blockReport() tells the NameNode about all the locally-stored blocks.
+ * The NameNode returns an array of Blocks that have become obsolete
+ * and should be deleted. This function is meant to upload *all*
+ * the locally-stored blocks. It's invoked upon startup and then
+ * infrequently afterwards.
+ */
public Block[] blockReport(String sender, Block blocks[]) throws IOException;
+
+ /**
+ * blockReceived() allows the DataNode to tell the NameNode about
+ * recently-received block data. For example, whenever client code
+ * writes a new Block here, or another DataNode copies a Block to
+ * this DataNode, it will call blockReceived().
+ */
public void blockReceived(String sender, Block blocks[]) throws IOException;
+
+ /**
+ * errorReport() tells the NameNode about something that has gone
+ * awry. Useful for debugging.
+ */
public void errorReport(String sender, String msg) throws IOException;
+ /**
+ * The DataNode periodically calls getBlockwork(). It includes a
+ * small amount of status information, but mainly gives the NameNode
+ * a chance to return a "BlockCommand" object. A BlockCommand tells
+ * the DataNode to invalidate local block(s), or to copy them to other
+ * DataNodes, etc.
+ */
public BlockCommand getBlockwork(String sender, int xmitsInProgress) throws IOException;
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java Sun Feb 12 23:25:35 2006
@@ -26,8 +26,12 @@
/****************************************************************
* Implementation of the abstract FileSystem for the DFS system.
- * This is the distributed file system. It can be distributed over
- * 1 or more machines
+ * This object is the way end-user code interacts with a Hadoop
+ * DistributedFileSystem.
+ *
+ * It's substantially a wrapper around the DFSClient class, with
+ * a few extra functions.
+ *
* @author Mike Cafarella
*****************************************************************/
public class DistributedFileSystem extends FileSystem {
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Sun Feb 12 23:25:35 2006
@@ -24,7 +24,10 @@
import java.util.logging.*;
/***************************************************
- * The FSNamesystem tracks several important tables.
+ * FSNamesystem does the actual bookkeeping work for the
+ * DataNode.
+ *
+ * It tracks several important tables.
*
* 1) valid fsname --> blocklist (kept on disk, logged)
* 2) Set of all valid blocks (inverted #1)
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java Sun Feb 12 23:25:35 2006
@@ -24,13 +24,34 @@
import java.util.logging.*;
/**********************************************************
- * NameNode controls two critical tables:
- * 1) filename->blocksequence,version
- * 2) block->machinelist
+ * NameNode serves as both directory namespace manager and
+ * "inode table" for the Hadoop DFS. There is a single NameNode
+ * running in any DFS deployment. (Well, except when there
+ * is a second backup/failover NameNode.)
+ *
+ * The NameNode controls two critical tables:
+ * 1) filename->blocksequence (namespace)
+ * 2) block->machinelist ("inodes")
*
* The first table is stored on disk and is very precious.
* The second table is rebuilt every time the NameNode comes
* up.
+ *
+ * 'NameNode' refers to both this class as well as the 'NameNode server'.
+ * The 'FSNamesystem' class actually performs most of the filesystem
+ * management. The majority of the 'NameNode' class itself is concerned
+ * with exposing the IPC interface to the outside world, plus some
+ * configuration management.
+ *
+ * NameNode implements the ClientProtocol interface, which allows
+ * clients to ask for DFS services. ClientProtocol is not
+ * designed for direct use by authors of DFS client code. End-users
+ * should instead use the org.apache.nutch.hadoop.fs.FileSystem class.
+ *
+ * NameNode also implements the DatanodeProtocol interface, used by
+ * DataNode programs that actually store DFS data blocks. These
+ * methods are invoked repeatedly and automatically by all the
+ * DataNodes in a DFS deployment.
*
* @author Mike Cafarella
**********************************************************/
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/package.html
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/package.html?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/package.html (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/package.html Sun Feb 12 23:25:35 2006
@@ -5,5 +5,12 @@
org.apache.hadoop.fs.FileSystem}. This is loosely modelled after
Google's <a href="http://labs.google.com/papers/gfs.html">GFS</a>.</p>
+<p>The most important difference is that unlike GFS, Hadoop DFS files
+have strictly one writer at any one time. Bytes are always appended
+to the end of the writer's stream. There is no notion of "record appends"
+or "mutations" that are then checked or reordered. Writers simply emit
+a byte stream. That byte stream is guaranteed to be stored in the
+order written.</p>
+
</body>
</html>
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java?rev=377317&r1=377316&r2=377317&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java Sun Feb 12 23:25:35 2006
@@ -25,18 +25,19 @@
import org.apache.hadoop.util.LogFormatter;
/****************************************************************
- * An abstract base class for a fairly simple
- * distributed file system.
- * A Hadoop installation might consist
- * of multiple machines, which should swap files transparently.
- * This interface allows other Hadoop systems to find and place
- * files into the distributed Hadoop-controlled file world.
+ * An abstract base class for a fairly generic filesystem. It
+ * may be implemented as a distributed filesystem, or as a "local"
+ * one that reflects the locally-connected disk. The local version
+ * exists for small Hadopp instances and for testing.
+ *
* <p>
- * A local implementation exists for testing and for small Hadoop instances.
- * <p>
- * The standard job of FileSystem is to take the location-
- * independent HadoopFile objects, and resolve them using local
- * knowledge and local instances of ShareGroup.
+ *
+ * All user code that may potentially use the Hadoop Distributed
+ * File System should be written to use a FileSystem object. The
+ * Hadoop DFS is a multi-machine system that appears as a single
+ * disk. It's useful because of its fault tolerance and potentially
+ * very large capacity.
+ *
* <p>
* The local implementation is {@link LocalFileSystem} and distributed
* implementation is {@link DistributedFileSystem}.