You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by ni...@apache.org on 2017/08/09 17:51:04 UTC

[18/23] samza git commit: Samza-1379: Create Azure Client

Samza-1379: Create Azure Client

navina
**PR 1: AzureClient + AzureConfig** (current PR)

Author: PawasChhokra <Jaimatadi1$>

Reviewers: Navina Ramesh <na...@apache.org>

Closes #254 from PawasChhokra/AzureStorageClient


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/966730ee
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/966730ee
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/966730ee

Branch: refs/heads/0.14.0
Commit: 966730ee60e59270ca3c6511554b73421febb205
Parents: 69dbada
Author: Pawas Chhokra <pa...@gmail.com>
Authored: Thu Aug 3 18:58:10 2017 -0700
Committer: navina <na...@apache.org>
Committed: Thu Aug 3 18:58:10 2017 -0700

----------------------------------------------------------------------
 build.gradle                                    | 20 ++++++
 .../versioned/jobs/configuration-table.html     | 22 +++++-
 samza-azure/README.md                           | 34 +++++++++
 .../main/java/org/apache/samza/AzureClient.java | 64 +++++++++++++++++
 .../main/java/org/apache/samza/AzureConfig.java | 72 ++++++++++++++++++++
 settings.gradle                                 |  3 +-
 6 files changed, 213 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/966730ee/build.gradle
----------------------------------------------------------------------
diff --git a/build.gradle b/build.gradle
index 8d1b37e..16fe2cf 100644
--- a/build.gradle
+++ b/build.gradle
@@ -179,6 +179,26 @@ project(":samza-core_$scalaVersion") {
   }
 }
 
+
+project(':samza-azure') {
+  apply plugin: 'java'
+  apply plugin: 'checkstyle'
+
+  dependencies {
+    compile "com.microsoft.azure:azure-storage:5.3.1"
+    compile "com.fasterxml.jackson.core:jackson-core:2.8.8"
+    compile project(':samza-api')
+    compile project(":samza-core_$scalaVersion")
+    compile "org.slf4j:slf4j-api:$slf4jVersion"
+    testCompile "junit:junit:$junitVersion"
+  }
+  checkstyle {
+    configFile = new File(rootDir, "checkstyle/checkstyle.xml")
+    toolVersion = "$checkstyleVersion"
+  }
+}
+
+
 project(":samza-autoscaling_$scalaVersion") {
   apply plugin: 'scala'
   apply plugin: 'checkstyle'

http://git-wip-us.apache.org/repos/asf/samza/blob/966730ee/docs/learn/documentation/versioned/jobs/configuration-table.html
----------------------------------------------------------------------
diff --git a/docs/learn/documentation/versioned/jobs/configuration-table.html b/docs/learn/documentation/versioned/jobs/configuration-table.html
index bdf477a..dc1df30 100644
--- a/docs/learn/documentation/versioned/jobs/configuration-table.html
+++ b/docs/learn/documentation/versioned/jobs/configuration-table.html
@@ -424,7 +424,8 @@
                             <dd>Fixed partition mapping. No Zoookeeper. </dd>
                             <dt><code>org.apache.samza.zk.ZkJobCoordinatorFactory</code></dt>
                             <dd>Zookeeper-based coordination. </dd>
-                        </dl>
+                           <dt><code>org.apache.samza.AzureJobCoordinatorFactory</code></dt>
+                           <dd>Azure-based coordination</dl>
                         Required only for non-cluster-managed applications. Please see the required value for <a href=#task-name-grouper-factory>task-name-grouper-factory </a>
                     </td>
                 </tr>
@@ -468,6 +469,25 @@
                         How long the Leader processor will wait before recalculating the JobModel on change of registered processors.
                     </td>
                 </tr>
+
+                <th colspan="3" class="section" id="AzureBasedJobCoordination"><a href="../index.html">Azure-based job configuration</a></th>
+                </tr>
+                <tr>
+                    <td class="property" id="azure.storage.connect">azure.storage.connect</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        <strong>Required</strong> for applications with Azure-based coordination. This is the storage connection string related to every Azure account. It is of the format: "DefaultEndpointsProtocol=https;AccountName=&ltInsert your account name&gt;;AccountKey=&ltInsert your account key&gt;"
+
+                    </td>
+                </tr>
+                <tr>
+                    <td class="property" id="job.coordinator.azure.blob.length">job.coordinator.azure.blob.length</td>
+                    <td class="default"> 5120000 </td>
+                    <td class="description">
+                        Length in bytes, of the page blob on which the leader stores the shared data. Different types of data is stored on different pages with predefined lengths. The offsets of these pages are dependent on the total page blob length.
+                    </td>
+                </tr>
+
                 <tr>
                     <th colspan="3" class="section" id="task"><a href="../api/overview.html">Task configuration</a></th>
                 </tr>

http://git-wip-us.apache.org/repos/asf/samza/blob/966730ee/samza-azure/README.md
----------------------------------------------------------------------
diff --git a/samza-azure/README.md b/samza-azure/README.md
new file mode 100644
index 0000000..0e9e9cf
--- /dev/null
+++ b/samza-azure/README.md
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+## Samza on Azure
+
+* Provides the ability to run Samza Standalone in the cloud, using Azure.
+* Removes dependency from Zookeeper
+* All coordination services written using services provided by Azure.
+
+Read [Samza on Azure Design Doc](https://cwiki.apache.org/confluence/display/SAMZA/SEP-7%3A+Samza+on+Azure) to learn more about the implementation details.
+
+### Running Samza with Azure
+
+* Change: job.coordinator.factory = org.apache.samza.AzureJobCoordinatorFactory
+* Add Azure Storage Connection String. 
+<br /> azure.storage.connect = DefaultEndpointsProtocol=https;AccountName="Insert your account name";AccountKey="Insert your account key"
+* Add blob length in bytes => job.coordinator.azure.blob.length
+<br /> Default value = 5120000
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/samza/blob/966730ee/samza-azure/src/main/java/org/apache/samza/AzureClient.java
----------------------------------------------------------------------
diff --git a/samza-azure/src/main/java/org/apache/samza/AzureClient.java b/samza-azure/src/main/java/org/apache/samza/AzureClient.java
new file mode 100644
index 0000000..b5884cd
--- /dev/null
+++ b/samza-azure/src/main/java/org/apache/samza/AzureClient.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.samza;
+
+import com.microsoft.azure.storage.CloudStorageAccount;
+import com.microsoft.azure.storage.blob.CloudBlobClient;
+import com.microsoft.azure.storage.table.CloudTableClient;
+import java.net.URISyntaxException;
+import java.security.InvalidKeyException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Creates the client handles for the Azure Storage account, Azure Blob storage and Azure Table storage
+ */
+public class AzureClient {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AzureClient.class);
+  private final CloudStorageAccount account;
+  private final CloudTableClient tableClient;
+  private final CloudBlobClient blobClient;
+
+  AzureClient(String storageConnectionString) {
+    try {
+      account = CloudStorageAccount.parse(storageConnectionString);
+      blobClient = account.createCloudBlobClient();
+      tableClient = account.createCloudTableClient();
+    } catch (IllegalArgumentException | URISyntaxException e) {
+      LOG.error("\nConnection string {} specifies an invalid URI.", storageConnectionString);
+      LOG.error("Please confirm the connection string is in the Azure connection string format.");
+      throw new SamzaException(e);
+    } catch (InvalidKeyException e) {
+      LOG.error("\nConnection string {} specifies an invalid key.", storageConnectionString);
+      LOG.error("Please confirm the AccountName and AccountKey in the connection string are valid.");
+      throw new SamzaException(e);
+    }
+  }
+
+  public CloudBlobClient getBlobClient() {
+    return blobClient;
+  }
+
+  public CloudTableClient getTableClient() {
+    return tableClient;
+  }
+}

http://git-wip-us.apache.org/repos/asf/samza/blob/966730ee/samza-azure/src/main/java/org/apache/samza/AzureConfig.java
----------------------------------------------------------------------
diff --git a/samza-azure/src/main/java/org/apache/samza/AzureConfig.java b/samza-azure/src/main/java/org/apache/samza/AzureConfig.java
new file mode 100644
index 0000000..b88d3c0
--- /dev/null
+++ b/samza-azure/src/main/java/org/apache/samza/AzureConfig.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.samza;
+
+import org.apache.samza.config.ApplicationConfig;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.ConfigException;
+import org.apache.samza.config.MapConfig;
+
+
+public class AzureConfig extends MapConfig {
+
+  // Connection string for Azure Storage Account, format: "DefaultEndpointsProtocol=<https>;AccountName=<>;AccountKey=<>"
+  public static final String AZURE_STORAGE_CONNECT = "azure.storage.connect";
+  public static final String AZURE_PAGEBLOB_LENGTH = "job.coordinator.azure.blob.length";
+
+  private static String containerName;
+  private static String blobName;
+  private static String tableName;
+  public static final long DEFAULT_AZURE_PAGEBLOB_LENGTH = 5120000;
+
+  public AzureConfig(Config config) {
+    super(config);
+    ApplicationConfig appConfig = new ApplicationConfig(config);
+    //Remove all non-alphanumeric characters from id as table name does not allow them.
+    String id = appConfig.getGlobalAppId().replaceAll("[^A-Za-z0-9]", "");
+    containerName = "samzacontainer" + id;
+    blobName = "samzablob" + id;
+    tableName = "samzatable" + id;
+  }
+
+  public String getAzureConnect() {
+    if (!containsKey(AZURE_STORAGE_CONNECT)) {
+      throw new ConfigException("Missing " + AZURE_STORAGE_CONNECT + " config!");
+    }
+    return get(AZURE_STORAGE_CONNECT);
+  }
+
+  public String getAzureContainerName() {
+    return containerName;
+  }
+
+  public String getAzureBlobName() {
+    return blobName;
+  }
+  public long getAzureBlobLength() {
+    return getLong(AZURE_PAGEBLOB_LENGTH, DEFAULT_AZURE_PAGEBLOB_LENGTH);
+  }
+
+  public String getAzureTableName() {
+    return tableName;
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/samza/blob/966730ee/settings.gradle
----------------------------------------------------------------------
diff --git a/settings.gradle b/settings.gradle
index 417ada4..a4eba94 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -22,7 +22,8 @@ include \
   'samza-elasticsearch',
   'samza-log4j',
   'samza-rest',
-  'samza-shell'
+  'samza-shell',
+  'samza-azure'
 
 def scalaModules = [
         'samza-core',