You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-issues@hadoop.apache.org by "Anuj (Jira)" <ji...@apache.org> on 2020/02/05 09:34:00 UTC

[jira] [Commented] (HADOOP-16417) abfs can't access storage account without password

    [ https://issues.apache.org/jira/browse/HADOOP-16417?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17030492#comment-17030492 ] 

Anuj commented on HADOOP-16417:
-------------------------------

Facing the same issue in which if soft delete is enabled on storage account.Hadoop fs -ls command fails with 

Operation failed: "This endpoint does not support BlobStorageEvents or SoftDelete. Please disable these account features if you would like to use this endpoint.", 409, HEAD, https://<account_name>.dfs.core.windows.net/test-container-1//?upn=false&action=getAccessControl&timeout=90

Command :

hadoop fs -Dfs.azure.account.auth.type.<account_name>.dfs.core.windows.net=OAuth -Dfs.azure.account.oauth.provider.type.<account_name>.dfs.core.windows.net=org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider -ls abfs://test-container-1@<account_name>.dfs.core.windows.net/

> abfs can't access storage account without password
> --------------------------------------------------
>
>                 Key: HADOOP-16417
>                 URL: https://issues.apache.org/jira/browse/HADOOP-16417
>             Project: Hadoop Common
>          Issue Type: Sub-task
>          Components: fs/azure
>    Affects Versions: 3.2.0
>            Reporter: Jose Luis Pedrosa
>            Assignee: Masatake Iwasaki
>            Priority: Minor
>         Attachments: HADOOP-16417.000.patch
>
>
> *** NOTE: apparently a hack around is use any string as password. Azure will allow access with wrong password to open SA.
> It does not seem possible to access storage accounts without passwords using abfs, but it is possible using wasb.
>  
> This sample code (Spark based) to illustrate, the following code using abfs_path with throw an exception
> {noformat}
> Exception in thread "main" java.lang.IllegalArgumentException: Invalid account key.
>         at org.apache.hadoop.fs.azurebfs.services.SharedKeyCredentials.<init>(SharedKeyCredentials.java:70)
>         at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.initializeClient(AzureBlobFileSystemStore.java:812)
>         at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.<init>(AzureBlobFileSystemStore.java:149)
>         at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.initialize(AzureBlobFileSystem.java:108)
>         at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3303)
>         at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124)
>         at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3352)
>         at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3320)
>         at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:479)
>         at org.apache.hadoop.fs.Path.getFileSystem(Path.java:361)
> {noformat}
>   While using the wasb_path will work normally,
> {code:java}
> import org.apache.spark.api.java.function.FilterFunction;
> import org.apache.spark.sql.RuntimeConfig;
> import org.apache.spark.sql.SparkSession;
> import org.apache.spark.sql.Dataset;
> import org.apache.spark.sql.Row;
> public class SimpleApp {
>     static String blob_account_name = "azureopendatastorage";
>     static String blob_container_name = "gfsweatherdatacontainer";
>     static String blob_relative_path = "GFSWeather/GFSProcessed";
>     static String blob_sas_token = "";
>     static String abfs_path = "abfs://"+blob_container_name+"@"+blob_account_name+".dfs.core.windows.net/"+blob_relative_path;
>     static String wasbs_path = "wasbs://"+blob_container_name + "@"+blob_account_name+".blob.core.windows.net/" + blob_relative_path;
>     public static void main(String[] args) {
>        
>         SparkSession spark = SparkSession.builder().appName("NOAAGFS Run").getOrCreate();
>         configureAzureHadoopConnetor(spark);
>         RuntimeConfig conf = spark.conf();
>         conf.set("fs.azure.account.key."+blob_account_name+".dfs.core.windows.net", blob_sas_token);
>         conf.set("fs.azure.account.key."+blob_account_name+".blob.core.windows.net", blob_sas_token);
>         System.out.println("Creating parquet dataset");
>         Dataset<Row> logData = spark.read().parquet(abfs_path);
>         System.out.println("Creating temp view");
>         logData.createOrReplaceTempView("source");
>         System.out.println("SQL");
>         spark.sql("SELECT * FROM source LIMIT 10").show();
>         spark.stop();
>     }
>     public static void configureAzureHadoopConnetor(SparkSession session) {
>         RuntimeConfig conf = session.conf();
>         conf.set("fs.AbstractFileSystem.wasb.impl","org.apache.hadoop.fs.azure.Wasb");
>         conf.set("fs.AbstractFileSystem.wasbs.impl","org.apache.hadoop.fs.azure.Wasbs");
>         conf.set("fs.wasb.impl","org.apache.hadoop.fs.azure.NativeAzureFileSystem");
>         conf.set("fs.wasbs.impl","org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure");
>         conf.set("fs.azure.secure.mode", false);
>         conf.set("fs.abfs.impl",  "org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem");
>         conf.set("fs.abfss.impl", "org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem");
>         conf.set("fs.AbstractFileSystem.abfs.impl","org.apache.hadoop.fs.azurebfs.Abfs");
>         conf.set("fs.AbstractFileSystem.abfss.impl","org.apache.hadoop.fs.azurebfs.Abfss");
>         // Works in conjuction with fs.azure.secure.mode. Setting this config to true
>         //    results in fs.azure.NativeAzureFileSystem using the local SAS key generation
>         //    where the SAS keys are generating in the same process as fs.azure.NativeAzureFileSystem.
>         //    If fs.azure.secure.mode flag is set to false, this flag has no effect.
>         conf.set("fs.azure.local.sas.key.mode", false);
>     }
> }
> {code}
> Sample build.gradle
> {noformat}
> plugins {
>     id 'java'
> }
> group 'org.samples'
> version '1.0-SNAPSHOT'
> sourceCompatibility = 1.8
> repositories {
>     mavenCentral()
> }
> dependencies {
>     compile  'org.apache.spark:spark-sql_2.12:2.4.3'
> }
> {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-issues-help@hadoop.apache.org