You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-issues@hadoop.apache.org by GitBox <gi...@apache.org> on 2020/08/16 00:34:21 UTC

[GitHub] [hadoop] umamaheswararao commented on a change in pull request #2229: HDFS-15533: Provide DFS API compatible class, but use ViewFileSystemOverloadScheme inside.

umamaheswararao commented on a change in pull request #2229:
URL: https://github.com/apache/hadoop/pull/2229#discussion_r471048226



##########
File path: hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ViewDistributedFileSystem.java
##########
@@ -0,0 +1,1864 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.crypto.key.KeyProvider;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.BlockStoragePolicySpi;
+import org.apache.hadoop.fs.CacheFlag;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileChecksum;
+import org.apache.hadoop.fs.FileEncryptionInfo;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FsServerDefaults;
+import org.apache.hadoop.fs.FsStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Options;
+import org.apache.hadoop.fs.PartialListing;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.PathHandle;
+import org.apache.hadoop.fs.QuotaUsage;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.StorageType;
+import org.apache.hadoop.fs.XAttrSetFlag;
+import org.apache.hadoop.fs.permission.AclEntry;
+import org.apache.hadoop.fs.permission.AclStatus;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.viewfs.ViewFileSystem;
+import org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme;
+import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
+import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
+import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
+import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
+import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ECTopologyVerifierResult;
+import org.apache.hadoop.hdfs.protocol.EncryptionZone;
+import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
+import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.HdfsPathHandle;
+import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
+import org.apache.hadoop.hdfs.protocol.OpenFilesIterator;
+import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
+import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
+import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.token.DelegationTokenIssuer;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.Progressable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The ViewDistributedFileSystem is an extended class to DistributedFileSystem
+ * with additional mounting functionality. The goal is to have better API
+ * compatibility for HDFS users when using mounting
+ * filesystem(ViewFileSystemOverloadScheme).
+ * The ViewFileSystemOverloadScheme{@link ViewFileSystemOverloadScheme} is a new
+ * filesystem with inherited mounting functionality from ViewFileSystem.
+ * For the user who is using ViewFileSystemOverloadScheme by setting
+ * fs.hdfs.impl=org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme, now
+ * they can set fs.hdfs.impl=org.apache.hadoop.hdfs.ViewDistributedFileSystem.
+ * So, that the hdfs users will get closely compatible API with mount
+ * functionality. For the rest of all other schemes can continue to use
+ * ViewFileSystemOverloadScheme class directly for mount functionality. Please
+ * note that ViewFileSystemOverloadScheme provides only
+ * ViewFileSystem{@link ViewFileSystem} APIs.
+ * If user configured this class but no mount point configured? Then it will
+ * simply work as existing DistributedFileSystem class. If user configured both
+ * fs.hdfs.impl to this class and mount configurations, then users will be able
+ * to make calls the APIs available in this class, they are nothing but DFS
+ * APIs, but they will be delegated to viewfs functionality. Please note, APIs
+ * without any path in arguments( ex: isInSafeMode), will be delegated to
+ * default filesystem only, that is the configured fallback link. If you want to
+ * make these API calls on specific child filesystem, you may want to initialize
+ * them separately and call. In ViewDistributedFileSystem, linkFallBack is
+ * mandatory when you ass mount links and it must be to your base cluster,
+ * usually your current fs.defaultFS if that's pointing to hdfs.
+ */
+public class ViewDistributedFileSystem extends DistributedFileSystem {
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(ViewDistributedFileSystem.class);
+
+  // A mounting file system.
+  private ViewFileSystemOverloadScheme vfs;
+  // A default DFS, which should have set via linkFallback
+  private DistributedFileSystem defaultDFS;
+
+  @Override
+  public void initialize(URI uri, Configuration conf) throws IOException {
+    super.initialize(uri, conf);
+    try {
+      this.vfs = tryInitializeMountingViewFs(uri, conf);
+    } catch (IOException ioe) {
+      LOGGER.debug(
+          "Mount tree initialization failed with the reason => {}. Falling" +
+              " back to regular DFS initialization. Please" + " re-initialize" +
+              " the fs after updating mount point.",
+          ioe.getMessage());
+      // Re-initialize, so that initDFSClient will initialize DFSClient to work
+      // same as DistributedFileSystem.
+      super.initialize(uri, conf);

Review comment:
       The idea was, the first initialization should initing DFSClient as we may initialize vfs. Later if we failed to init vfs, then we will continue to initialize DfsClient to get regular DFS behavior. 
   I corrected few things now and that may clear you things. Why I tried to init at first line was to make sure statistics initialized with correct class names. Otherwise I noticed, InternalViewFS also getting inited in statistics with hdfs scheme, that may create issues for the correctness.. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-issues-help@hadoop.apache.org