You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cn...@apache.org on 2016/06/17 06:41:56 UTC

hadoop git commit: HADOOP-13242. Authenticate to Azure Data Lake using client ID and keys. Contributed by Atul Sikaria.

Repository: hadoop
Updated Branches:
  refs/heads/trunk 51d497fa9 -> 51d16e7b3


HADOOP-13242. Authenticate to Azure Data Lake using client ID and keys. Contributed by Atul Sikaria.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/51d16e7b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/51d16e7b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/51d16e7b

Branch: refs/heads/trunk
Commit: 51d16e7b38d247f73b0ec2ffd8b2b02069c05a33
Parents: 51d497f
Author: Chris Nauroth <cn...@apache.org>
Authored: Thu Jun 16 23:35:20 2016 -0700
Committer: Chris Nauroth <cn...@apache.org>
Committed: Thu Jun 16 23:35:20 2016 -0700

----------------------------------------------------------------------
 hadoop-tools/hadoop-azure-datalake/pom.xml      |   7 +-
 ...ClientCredentialBasedAccesTokenProvider.java | 155 +++++++++++++++++++
 .../src/site/markdown/index.md                  |  64 ++++++++
 3 files changed, 225 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/51d16e7b/hadoop-tools/hadoop-azure-datalake/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/pom.xml b/hadoop-tools/hadoop-azure-datalake/pom.xml
index a4b1fe1..d2161c7 100644
--- a/hadoop-tools/hadoop-azure-datalake/pom.xml
+++ b/hadoop-tools/hadoop-azure-datalake/pom.xml
@@ -147,7 +147,12 @@
     <dependency>
     <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
-  </dependency>
+    </dependency>
+    <dependency>
+      <groupId>com.squareup.okhttp</groupId>
+      <artifactId>okhttp</artifactId>
+      <version>2.4.0</version>
+    </dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/51d16e7b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java
new file mode 100644
index 0000000..6dfc593
--- /dev/null
+++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hdfs.web.oauth2;
+
+import com.squareup.okhttp.OkHttpClient;
+import com.squareup.okhttp.Request;
+import com.squareup.okhttp.RequestBody;
+import com.squareup.okhttp.Response;
+import com.squareup.okhttp.MediaType;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.web.URLConnectionFactory;
+import org.apache.hadoop.util.Timer;
+import org.apache.http.HttpStatus;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.ObjectReader;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.hadoop.hdfs.web.oauth2.Utils.notNull;
+
+
+/**
+ * Obtain an access token via the credential-based OAuth2 workflow.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class AzureADClientCredentialBasedAccesTokenProvider
+    extends AccessTokenProvider {
+  private static final ObjectReader READER =
+      new ObjectMapper().reader(Map.class);
+
+  public static final String OAUTH_CREDENTIAL_KEY
+      = "dfs.webhdfs.oauth2.credential";
+
+  public static final String AAD_RESOURCE_KEY
+      = "fs.adls.oauth2.resource";
+
+  public static final String RESOURCE_PARAM_NAME
+      = "resource";
+
+  private static final String OAUTH_CLIENT_ID_KEY
+      = "dfs.webhdfs.oauth2.client.id";
+
+  private static final String OAUTH_REFRESH_URL_KEY
+      = "dfs.webhdfs.oauth2.refresh.url";
+
+
+  public static final String ACCESS_TOKEN = "access_token";
+  public static final String CLIENT_CREDENTIALS = "client_credentials";
+  public static final String CLIENT_ID = "client_id";
+  public static final String CLIENT_SECRET = "client_secret";
+  public static final String EXPIRES_IN = "expires_in";
+  public static final String GRANT_TYPE = "grant_type";
+  public static final MediaType URLENCODED
+          = MediaType.parse("application/x-www-form-urlencoded; charset=utf-8");
+
+
+  private AccessTokenTimer timer;
+
+  private String clientId;
+
+  private String refreshURL;
+
+  private String accessToken;
+
+  private String resource;
+
+  private String credential;
+
+  private boolean initialCredentialObtained = false;
+
+  AzureADClientCredentialBasedAccesTokenProvider() {
+    this.timer = new AccessTokenTimer();
+  }
+
+  AzureADClientCredentialBasedAccesTokenProvider(Timer timer) {
+    this.timer = new AccessTokenTimer(timer);
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    clientId = notNull(conf, OAUTH_CLIENT_ID_KEY);
+    refreshURL = notNull(conf, OAUTH_REFRESH_URL_KEY);
+    resource = notNull(conf, AAD_RESOURCE_KEY);
+    credential = notNull(conf, OAUTH_CREDENTIAL_KEY);
+  }
+
+  @Override
+  public String getAccessToken() throws IOException {
+    if(timer.shouldRefresh() || !initialCredentialObtained) {
+      refresh();
+      initialCredentialObtained = true;
+    }
+    return accessToken;
+  }
+
+  void refresh() throws IOException {
+    try {
+      OkHttpClient client = new OkHttpClient();
+      client.setConnectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
+          TimeUnit.MILLISECONDS);
+      client.setReadTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
+          TimeUnit.MILLISECONDS);
+
+      String bodyString = Utils.postBody(CLIENT_SECRET, credential,
+          GRANT_TYPE, CLIENT_CREDENTIALS,
+          RESOURCE_PARAM_NAME, resource,
+          CLIENT_ID, clientId);
+
+      RequestBody body = RequestBody.create(URLENCODED, bodyString);
+
+      Request request = new Request.Builder()
+          .url(refreshURL)
+          .post(body)
+          .build();
+      Response responseBody = client.newCall(request).execute();
+
+      if (responseBody.code() != HttpStatus.SC_OK) {
+        throw new IllegalArgumentException("Received invalid http response: "
+            + responseBody.code() + ", text = " + responseBody.toString());
+      }
+
+      Map<?, ?> response = READER.readValue(responseBody.body().string());
+
+      String newExpiresIn = response.get(EXPIRES_IN).toString();
+      timer.setExpiresIn(newExpiresIn);
+
+      accessToken = response.get(ACCESS_TOKEN).toString();
+
+    } catch (Exception e) {
+      throw new IOException("Unable to obtain access token from credential", e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/51d16e7b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
index 4158c88..3f03d41 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
+++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
@@ -23,6 +23,9 @@
         * [OAuth2 Support](#OAuth2_Support)
         * [Read Ahead Buffer Management](Read_Ahead_Buffer_Management)
     * [Configuring Credentials & FileSystem](#Configuring_Credentials)
+        * [Using Refresh Token](#Refresh_Token)
+        * [Using Client Keys](#Client_Credential_Token)
+    * [Enabling ADL Filesystem](#Enabling_ADL)
     * [Accessing adl URLs](#Accessing_adl_URLs)
 * [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module)
 
@@ -131,6 +134,9 @@ To configure number of concurrent connection to Azure Data Lake Storage Account.
     </property>
 
 ## <a name="Configuring_Credentials" />Configuring Credentials & FileSystem
+Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal).
+
+### <a name="Refresh_Token" />Using Refresh Token
 
 Update core-site.xml for OAuth2 configuration
 
@@ -173,6 +179,64 @@ Application require to set Client id and OAuth2 refresh token from Azure Active
             <value></value>
         </property>
 
+
+### <a name="Client_Credential_Token" />Using Client Keys
+
+#### Generating the Service Principal
+1.  Go to the portal (https://portal.azure.com)
+2.  Under "Browse", look for Active Directory and click on it.
+3.  Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user.
+4.  Go through the wizard
+5.  Once app is created, Go to app configuration, and find the section on "keys"
+6.  Select a key duration and hit save. Save the generated keys.
+7. Note down the properties you will need to auth:
+    -  The client ID
+    -  The key you just generated above
+    -  The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value)
+    -  Resource: Always https://management.core.windows.net/ , for all customers
+
+#### Adding the service principal to your ADL Account
+1.  Go to the portal again, and open your ADL account
+2.  Select Users under Settings
+3.  Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name)
+4.  Add "Owner" role
+
+#### Configure core-site.xml
+Add the following properties to your core-site.xml
+
+    <property>
+      <name>dfs.webhdfs.oauth2.access.token.provider</name>
+      <value>org.apache.hadoop.hdfs.web.oauth2.AzureADClientCredentialBasedAccesTokenProvider</value>
+    </property>
+
+    <property>
+      <name>dfs.webhdfs.oauth2.refresh.url</name>
+      <value>TOKEN ENDPOINT FROM STEP 7 ABOVE</value>
+    </property>
+
+    <property>
+      <name>dfs.webhdfs.oauth2.client.id</name>
+      <value>CLIENT ID FROM STEP 7 ABOVE</value>
+    </property>
+
+    <property>
+      <name>dfs.webhdfs.oauth2.credential</name>
+      <value>PASSWORD FROM STEP 7 ABOVE</value>
+    </property>
+
+    <property>
+      <name>fs.adls.oauth2.resource</name>
+      <value>https://management.core.windows.net/</value>
+    </property>
+
+    <property>
+      <name>fs.defaultFS</name>
+      <value>YOUR ADL STORE URL (e.g., https://example.azuredatalakestore.net) </value>
+    </property>
+
+
+## <a name="Enabling_ADL" />Enabling ADL Filesystem
+
 For ADL FileSystem to take effect. Update core-site.xml with
 
         <property>


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org