You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crail.apache.org by at...@apache.org on 2018/09/06 09:26:59 UTC
incubator-crail-website git commit: Publishing from 2e7887e6151f9ff57ff12f272a6ebb0212a306f1

Repository: incubator-crail-website
Updated Branches:
  refs/heads/asf-site 5b208cb94 -> 8e2b6190a


Publishing from 2e7887e6151f9ff57ff12f272a6ebb0212a306f1


Project: http://git-wip-us.apache.org/repos/asf/incubator-crail-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-crail-website/commit/8e2b6190
Tree: http://git-wip-us.apache.org/repos/asf/incubator-crail-website/tree/8e2b6190
Diff: http://git-wip-us.apache.org/repos/asf/incubator-crail-website/diff/8e2b6190

Branch: refs/heads/asf-site
Commit: 8e2b6190a8476c5694b4d7ca53d9a36560bd33ad
Parents: 5b208cb
Author: Animesh Trivedi <an...@gmail.com>
Authored: Thu Sep 6 11:26:38 2018 +0200
Committer: Animesh Trivedi <an...@gmail.com>
Committed: Thu Sep 6 11:26:38 2018 +0200

----------------------------------------------------------------------
 content/Gemfile.lock                            | 192 ++-----------------
 content/blog/2017/01/sorting.html               |  23 +--
 content/blog/2017/08/crail-memory.html          |   2 +-
 content/blog/2017/08/crail-nvme-fabrics-v1.html |   2 +-
 content/blog/2017/11/crail-metadata.html        |   2 +-
 content/blog/2017/11/rdmashuffle.html           |  23 +--
 content/blog/2018/08/sql-p1.html                |  10 +-
 content/blog/index.html                         |  24 ++-
 content/blog/page2/index.html                   |  24 ++-
 content/blog/page3/index.html                   |  24 ++-
 content/blog/page4/index.html                   |  24 ++-
 content/blog/page5/index.html                   |  24 ++-
 content/feed.xml                                |  12 +-
 13 files changed, 119 insertions(+), 267 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/Gemfile.lock
----------------------------------------------------------------------
diff --git a/content/Gemfile.lock b/content/Gemfile.lock
index 9962c76..f3da16c 100644
--- a/content/Gemfile.lock
+++ b/content/Gemfile.lock
@@ -1,207 +1,53 @@
 GEM
   remote: https://rubygems.org/
   specs:
-    activesupport (4.2.9)
-      i18n (~> 0.7)
-      minitest (~> 5.1)
-      thread_safe (~> 0.3, >= 0.3.4)
-      tzinfo (~> 1.1)
     addressable (2.5.2)
       public_suffix (>= 2.0.2, < 4.0)
-    coffee-script (2.4.1)
-      coffee-script-source
-      execjs
-    coffee-script-source (1.11.1)
     colorator (1.1.0)
-    commonmarker (0.17.7.1)
-      ruby-enum (~> 0.5)
     concurrent-ruby (1.0.5)
-    ethon (0.11.0)
-      ffi (>= 1.3.0)
-    execjs (2.7.0)
+    em-websocket (0.5.1)
+      eventmachine (>= 0.12.9)
+      http_parser.rb (~> 0.6.0)
+    eventmachine (1.2.5)
     faraday (0.14.0)
       multipart-post (>= 1.2, < 3)
-    ffi (1.9.21)
+    ffi (1.9.18)
     forwardable-extended (2.6.0)
-    gemoji (3.0.0)
-    github-pages (177)
-      activesupport (= 4.2.9)
-      github-pages-health-check (= 1.3.5)
-      jekyll (= 3.6.2)
-      jekyll-avatar (= 0.5.0)
-      jekyll-coffeescript (= 1.0.2)
-      jekyll-commonmark-ghpages (= 0.1.5)
-      jekyll-default-layout (= 0.1.4)
-      jekyll-feed (= 0.9.2)
-      jekyll-gist (= 1.4.1)
-      jekyll-github-metadata (= 2.9.3)
-      jekyll-mentions (= 1.2.0)
-      jekyll-optional-front-matter (= 0.3.0)
-      jekyll-paginate (= 1.1.0)
-      jekyll-readme-index (= 0.2.0)
-      jekyll-redirect-from (= 0.12.1)
-      jekyll-relative-links (= 0.5.2)
-      jekyll-remote-theme (= 0.2.3)
-      jekyll-sass-converter (= 1.5.0)
-      jekyll-seo-tag (= 2.3.0)
-      jekyll-sitemap (= 1.1.1)
-      jekyll-swiss (= 0.4.0)
-      jekyll-theme-architect (= 0.1.0)
-      jekyll-theme-cayman (= 0.1.0)
-      jekyll-theme-dinky (= 0.1.0)
-      jekyll-theme-hacker (= 0.1.0)
-      jekyll-theme-leap-day (= 0.1.0)
-      jekyll-theme-merlot (= 0.1.0)
-      jekyll-theme-midnight (= 0.1.0)
-      jekyll-theme-minimal (= 0.1.0)
-      jekyll-theme-modernist (= 0.1.0)
-      jekyll-theme-primer (= 0.5.2)
-      jekyll-theme-slate (= 0.1.0)
-      jekyll-theme-tactile (= 0.1.0)
-      jekyll-theme-time-machine (= 0.1.0)
-      jekyll-titles-from-headings (= 0.5.0)
-      jemoji (= 0.8.1)
-      kramdown (= 1.16.2)
-      liquid (= 4.0.0)
-      listen (= 3.0.6)
-      mercenary (~> 0.3)
-      minima (= 2.1.1)
-      nokogiri (>= 1.8.1, < 2.0)
-      rouge (= 2.2.1)
-      terminal-table (~> 1.4)
-    github-pages-health-check (1.3.5)
-      addressable (~> 2.3)
-      net-dns (~> 0.8)
-      octokit (~> 4.0)
-      public_suffix (~> 2.0)
-      typhoeus (~> 0.7)
-    html-pipeline (2.7.1)
-      activesupport (>= 2)
-      nokogiri (>= 1.4)
-    i18n (0.9.5)
+    http_parser.rb (0.6.0)
+    i18n (0.9.1)
       concurrent-ruby (~> 1.0)
-    jekyll (3.6.2)
+    jekyll (3.7.0)
       addressable (~> 2.4)
       colorator (~> 1.0)
+      em-websocket (~> 0.5)
+      i18n (~> 0.7)
       jekyll-sass-converter (~> 1.0)
-      jekyll-watch (~> 1.1)
+      jekyll-watch (~> 2.0)
       kramdown (~> 1.14)
       liquid (~> 4.0)
       mercenary (~> 0.3.3)
       pathutil (~> 0.9)
-      rouge (>= 1.7, < 3)
+      rouge (>= 1.7, < 4)
       safe_yaml (~> 1.0)
-    jekyll-avatar (0.5.0)
-      jekyll (~> 3.0)
-    jekyll-coffeescript (1.0.2)
-      coffee-script (~> 2.2)
-      coffee-script-source (~> 1.11.1)
-    jekyll-commonmark (1.1.0)
-      commonmarker (~> 0.14)
-      jekyll (>= 3.0, < 4.0)
-    jekyll-commonmark-ghpages (0.1.5)
-      commonmarker (~> 0.17.6)
-      jekyll-commonmark (~> 1)
-      rouge (~> 2)
-    jekyll-default-layout (0.1.4)
-      jekyll (~> 3.0)
     jekyll-feed (0.9.2)
       jekyll (~> 3.3)
     jekyll-gist (1.4.1)
       octokit (~> 4.2)
-    jekyll-github-metadata (2.9.3)
-      jekyll (~> 3.1)
-      octokit (~> 4.0, != 4.4.0)
-    jekyll-mentions (1.2.0)
-      activesupport (~> 4.0)
-      html-pipeline (~> 2.3)
-      jekyll (~> 3.0)
     jekyll-oembed (0.0.1)
       jekyll
       ruby-oembed (= 0.8.8)
-    jekyll-optional-front-matter (0.3.0)
-      jekyll (~> 3.0)
     jekyll-paginate (1.1.0)
-    jekyll-readme-index (0.2.0)
-      jekyll (~> 3.0)
-    jekyll-redirect-from (0.12.1)
-      jekyll (~> 3.3)
-    jekyll-relative-links (0.5.2)
-      jekyll (~> 3.3)
-    jekyll-remote-theme (0.2.3)
-      jekyll (~> 3.5)
-      rubyzip (>= 1.2.1, < 3.0)
-      typhoeus (>= 0.7, < 2.0)
     jekyll-sass-converter (1.5.0)
       sass (~> 3.4)
-    jekyll-seo-tag (2.3.0)
-      jekyll (~> 3.3)
-    jekyll-sitemap (1.1.1)
-      jekyll (~> 3.3)
-    jekyll-swiss (0.4.0)
-    jekyll-theme-architect (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-cayman (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-dinky (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-hacker (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-leap-day (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-merlot (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-midnight (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-minimal (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-modernist (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-primer (0.5.2)
-      jekyll (~> 3.5)
-      jekyll-github-metadata (~> 2.9)
-      jekyll-seo-tag (~> 2.2)
-    jekyll-theme-slate (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-tactile (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-time-machine (0.1.0)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-titles-from-headings (0.5.0)
-      jekyll (~> 3.3)
-    jekyll-watch (1.5.1)
+    jekyll-watch (2.0.0)
       listen (~> 3.0)
-    jemoji (0.8.1)
-      activesupport (~> 4.0, >= 4.2.9)
-      gemoji (~> 3.0)
-      html-pipeline (~> 2.2)
-      jekyll (>= 3.0)
     kramdown (1.16.2)
     liquid (4.0.0)
     listen (3.0.6)
       rb-fsevent (>= 0.9.3)
       rb-inotify (>= 0.9.7)
     mercenary (0.3.6)
-    mini_portile2 (2.3.0)
-    minima (2.1.1)
-      jekyll (~> 3.3)
-    minitest (5.11.3)
     multipart-post (2.0.0)
-    net-dns (0.8.0)
-    nokogiri (1.8.2)
-      mini_portile2 (~> 2.3.0)
     octokit (4.8.0)
       sawyer (~> 0.8.0, >= 0.5.3)
     pathutil (0.16.1)
@@ -211,10 +57,7 @@ GEM
     rb-inotify (0.9.10)
       ffi (>= 0.5.0, < 2)
     rouge (2.2.1)
-    ruby-enum (0.7.1)
-      i18n
     ruby-oembed (0.8.8)
-    rubyzip (1.2.1)
     safe_yaml (1.0.4)
     sass (3.5.5)
       sass-listen (~> 4.0.0)
@@ -224,20 +67,11 @@ GEM
     sawyer (0.8.1)
       addressable (>= 2.3.5, < 2.6)
       faraday (~> 0.8, < 1.0)
-    terminal-table (1.8.0)
-      unicode-display_width (~> 1.1, >= 1.1.1)
-    thread_safe (0.3.6)
-    typhoeus (0.8.0)
-      ethon (>= 0.8.0)
-    tzinfo (1.2.5)
-      thread_safe (~> 0.1)
-    unicode-display_width (1.3.0)
 
 PLATFORMS
   ruby
 
 DEPENDENCIES
-  github-pages
   jekyll-feed
   jekyll-gist
   jekyll-oembed

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/2017/01/sorting.html
----------------------------------------------------------------------
diff --git a/content/blog/2017/01/sorting.html b/content/blog/2017/01/sorting.html
index dca63b1..89967e5 100644
--- a/content/blog/2017/01/sorting.html
+++ b/content/blog/2017/01/sorting.html
@@ -74,7 +74,7 @@
           <h2>Sorting on a 100Gbit/s Cluster using Spark/Crail</h2>   
           
 
-          <p class="meta">17 Jan 2017</p>
+          <p class="meta">17 Jan 2017,  <mark>this is a blog post from a user of the Crail project.</mark>  </p>
 
 <div class="post">
 <div style="text-align: justify"> 
@@ -294,27 +294,6 @@ The figure below shows the overall performance of Spark/Crail vs Spark/Vanilla o
 
 <!-- 
 
-<div id="disqus_thread"></div>
-<script>
-
-/**
-*  RECOMMENDED CONFIGURATION VARIABLES: EDIT AND UNCOMMENT THE SECTION BELOW TO INSERT DYNAMIC VALUES FROM YOUR PLATFORM OR CMS.
-*  LEARN WHY DEFINING THESE VARIABLES IS IMPORTANT: https://disqus.com/admin/universalcode/#configuration-variables*/
-/*
-var disqus_config = function () {
-this.page.url = PAGE_URL;  // Replace PAGE_URL with your page's canonical URL variable
-this.page.identifier = PAGE_IDENTIFIER; // Replace PAGE_IDENTIFIER with your page's unique identifier variable
-};
-*/
-(function() { // DON'T EDIT BELOW THIS LINE
-var d = document, s = d.createElement('script');
-s.src = '//crail-io.disqus.com/embed.js';
-s.setAttribute('data-timestamp', +new Date());
-(d.head || d.body).appendChild(s);
-})();
-</script>
-<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
-
 -->
 
 

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/2017/08/crail-memory.html
----------------------------------------------------------------------
diff --git a/content/blog/2017/08/crail-memory.html b/content/blog/2017/08/crail-memory.html
index 7effb61..ea9c7a9 100644
--- a/content/blog/2017/08/crail-memory.html
+++ b/content/blog/2017/08/crail-memory.html
@@ -74,7 +74,7 @@
           <h2>Crail Storage Performance -- Part I: DRAM</h2>   
           
 
-          <p class="meta">18 Aug 2017</p>
+          <p class="meta">18 Aug 2017,  </p>
 
 <div class="post">
 <div style="text-align: justify"> 

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/2017/08/crail-nvme-fabrics-v1.html
----------------------------------------------------------------------
diff --git a/content/blog/2017/08/crail-nvme-fabrics-v1.html b/content/blog/2017/08/crail-nvme-fabrics-v1.html
index 1a77d9d..3381d1a 100644
--- a/content/blog/2017/08/crail-nvme-fabrics-v1.html
+++ b/content/blog/2017/08/crail-nvme-fabrics-v1.html
@@ -74,7 +74,7 @@
           <h2>Crail Storage Performance -- Part II: NVMf</h2>   
           
 
-          <p class="meta">22 Aug 2017</p>
+          <p class="meta">22 Aug 2017,  </p>
 
 <div class="post">
 <div style="text-align: justify">

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/2017/11/crail-metadata.html
----------------------------------------------------------------------
diff --git a/content/blog/2017/11/crail-metadata.html b/content/blog/2017/11/crail-metadata.html
index cdd48f4..5df113a 100644
--- a/content/blog/2017/11/crail-metadata.html
+++ b/content/blog/2017/11/crail-metadata.html
@@ -74,7 +74,7 @@
           <h2>Crail Storage Performance -- Part III: Metadata</h2>   
           
 
-          <p class="meta">21 Nov 2017</p>
+          <p class="meta">21 Nov 2017,  </p>
 
 <div class="post">
 <div style="text-align: justify">

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/2017/11/rdmashuffle.html
----------------------------------------------------------------------
diff --git a/content/blog/2017/11/rdmashuffle.html b/content/blog/2017/11/rdmashuffle.html
index af4f0f0..816139f 100644
--- a/content/blog/2017/11/rdmashuffle.html
+++ b/content/blog/2017/11/rdmashuffle.html
@@ -74,7 +74,7 @@
           <h2>Spark Shuffle: SparkRDMA vs Crail</h2>   
           
 
-          <p class="meta">17 Nov 2017</p>
+          <p class="meta">17 Nov 2017,  <mark>this is a blog post from a user of the Crail project.</mark>  </p>
 
 <div class="post">
 <div style="text-align: justify">
@@ -162,27 +162,6 @@ These benchmarks validate our belief that a "last-mile" integration cannot deliv
 
 <!-- 
 
-<div id="disqus_thread"></div>
-<script>
-
-/**
-*  RECOMMENDED CONFIGURATION VARIABLES: EDIT AND UNCOMMENT THE SECTION BELOW TO INSERT DYNAMIC VALUES FROM YOUR PLATFORM OR CMS.
-*  LEARN WHY DEFINING THESE VARIABLES IS IMPORTANT: https://disqus.com/admin/universalcode/#configuration-variables*/
-/*
-var disqus_config = function () {
-this.page.url = PAGE_URL;  // Replace PAGE_URL with your page's canonical URL variable
-this.page.identifier = PAGE_IDENTIFIER; // Replace PAGE_IDENTIFIER with your page's unique identifier variable
-};
-*/
-(function() { // DON'T EDIT BELOW THIS LINE
-var d = document, s = d.createElement('script');
-s.src = '//crail-io.disqus.com/embed.js';
-s.setAttribute('data-timestamp', +new Date());
-(d.head || d.body).appendChild(s);
-})();
-</script>
-<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
-
 -->
 
 

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/2018/08/sql-p1.html
----------------------------------------------------------------------
diff --git a/content/blog/2018/08/sql-p1.html b/content/blog/2018/08/sql-p1.html
index 285f8f5..40cea88 100644
--- a/content/blog/2018/08/sql-p1.html
+++ b/content/blog/2018/08/sql-p1.html
@@ -74,12 +74,12 @@
           <h2>SQL Performance: Part 1 - Input File Formats</h2>   
           
 
-          <p class="meta">08 Aug 2018</p>
+          <p class="meta">08 Aug 2018,  <mark>this is a blog post from a user of the Crail project.</mark>  </p>
 
 <div class="post">
 <div style="text-align: justify">
 <p>
-This is the first blog post in a multi-part series where we will focus on relational data processing performance (e.g., SQL) in presence of high-performance network and storage devices - the kind of devices that Crail targets. Relational data processing is one of the most popular and versatile workloads people run in the  cloud. The general idea is that data is stored in tables with a schema, and is processed using a domain specific language like SQL. Examples of some popular systems that support such relational data analytics in the cloud are <a href="https://spark.apache.org/sql/">Apache Spark/SQL</a>, <a href="https://hive.apache.org/">Apache Hive</a>, <a href="https://impala.apache.org/">Apache Impala</a>, etc. In this post, we discuss the important first step in relational data processing, which is the reading of input data tables.
+This is the first user blog post in a multi-part series where we will focus on relational data processing performance (e.g., SQL) in presence of high-performance network and storage devices - the kind of devices that Crail targets. Relational data processing is one of the most popular and versatile workloads people run in the  cloud. The general idea is that data is stored in tables with a schema, and is processed using a domain specific language like SQL. Examples of some popular systems that support such relational data analytics in the cloud are <a href="https://spark.apache.org/sql/">Apache Spark/SQL</a>, <a href="https://hive.apache.org/">Apache Hive</a>, <a href="https://impala.apache.org/">Apache Impala</a>, etc. In this post, we discuss the important first step in relational data processing, which is the reading of input data tables.
 </p>
 </div>
 
@@ -112,7 +112,7 @@ This is the first blog post in a multi-part series where we will focus on relati
 
 <h3 id="overview">Overview</h3>
 
-<p>In a typical cloud-based relational data processing setup, the input data is stored on an external data storage solution like HDFS or AWS S3. Data tables and their associated schema are converted into a storage-friendly format for optimal performance. Examples of some popular and familiar file formats are <a href="https://parquet.apache.org/">Apache Parquet</a>, <a href="https://orc.apache.org/">Apache ORC</a>, <a href="https://avro.apache.org/">Apache Avro</a>, <a href="https://en.wikipedia.org/wiki/JSON">JSON</a>, etc. More recently, <a href="https://arrow.apache.org/">Apache Arrow</a> has been introduced to standardize the in-memory columnar data representation between multiple frameworks. There is no one size fits all as all these formats have their own strengths, weaknesses, and features. In this blog, we are specifically interested in the performance of these formats on modern high-performance networking and storage devices.</p>
+<p>In a typical cloud-based relational data processing setup, the input data is stored on an external data storage solution like HDFS or AWS S3. Data tables and their associated schema are converted into a storage-friendly format for optimal performance. Examples of some popular and familiar file formats are <a href="https://parquet.apache.org/">Apache Parquet</a>, <a href="https://orc.apache.org/">Apache ORC</a>, <a href="https://avro.apache.org/">Apache Avro</a>, <a href="https://en.wikipedia.org/wiki/JSON">JSON</a>, etc. More recently, <a href="https://arrow.apache.org/">Apache Arrow</a> has been introduced to standardize the in-memory columnar data representation between multiple frameworks. To be precise, Arrow is not a storage format but it defines an <a href="https://github.com/apache/arrow/blob/master/format/IPC.md">interprocess communication (IPC) format</a> that can be used to store data in a stroage system (our binding for reading Arrow IPC messages from HDFS is available
  <a href="https://github.com/zrlio/fileformat-benchmarks/blob/master/src/main/java/com/github/animeshtrivedi/FileBench/HdfsSeekableByteChannel.java">here</a>). There is no one size fits all as all these formats have their own strengths, weaknesses, and features. In this blog, we are specifically interested in the performance of these formats on modern high-performance networking and storage devices.</p>
 
 <figure><div style="text-align:center"><img src="//crail.incubator.apache.org/img/blog/sql-p1/outline.svg" width="550" /><figcaption>Figure 1: The benchmarking setup with HDFS and file formats on a 100 Gbps network with NVMe flash devices. All formats contains routines for compression, encoding, and value materialization with associated I/O buffer management and data copies routines.<p></p></figcaption></div></figure>
 
@@ -120,7 +120,7 @@ This is the first blog post in a multi-part series where we will focus on relati
 
 <figure><div style="text-align:center"><img src="//crail.incubator.apache.org/img/blog/sql-p1/performance-all.svg" width="550" /><figcaption>Figure 2: Performance of JSON, Avro, Parquet, ORC, and Arrow on NVMe devices over a 100 Gbps network.<p></p></figcaption></div></figure>
 
-<p>We evaluate the performance of the benchmark on a 3 node HDFS cluster connected using 100 Gbps RoCE. One datanode in HDFS contains 4 NVMe devices with a collective aggregate bandwidth of 12.5 GB/sec (equals to 100 Gbps, hence, we have a balanced network and storage performance). Figure 2 shows our results where none of the file formats is able to deliver the full hardware performance for reading input files. One third of the performance is already lost in HDFS (maximum throughput 74.9 Gbps out of possible 100 Gbps). The rest of the performance is lost inside the file format implementation, which needs to deal with encoding, buffer and I/O management, compression, etc. The best performer is Apache Arrow which is designed for in-memory columnar datasets. The performance of these file formats are bounded by the performance of the CPU, which is 100% loaded during the experiment. For a detailed analysis of the file formats, please refer to our paper - <a href="https://www.usenix.org/c
 onference/atc18/presentation/trivedi">Albis: High-Performance File Format for Big Data Systems (USENIX, ATC’18)</a>.</p>
+<p>We evaluate the performance of the benchmark on a 3 node HDFS cluster connected using 100 Gbps RoCE. One datanode in HDFS contains 4 NVMe devices with a collective aggregate bandwidth of 12.5 GB/sec (equals to 100 Gbps, hence, we have a balanced network and storage performance). Figure 2 shows our results where none of the file formats is able to deliver the full hardware performance for reading input files. One third of the performance is already lost in HDFS (maximum throughput 74.9 Gbps out of possible 100 Gbps). The rest of the performance is lost inside the file format implementation, which needs to deal with encoding, buffer and I/O management, compression, etc. The best performer is Apache Arrow which is designed for in-memory columnar datasets. The performance of these file formats are bounded by the performance of the CPU, which is 100% loaded during the experiment. For a detailed analysis of the file formats, please refer to our paper - <a href="https://www.usenix.org/c
 onference/atc18/presentation/trivedi">Albis: High-Performance File Format for Big Data Systems (USENIX, ATC’18)</a>. As a side-note on the Arrow performance - we have evaluated the performance of <em>implementation of Arrow’s Java library</em>. As this library has been focused on interactions with off-heap memory, there is a head room for optimizing the HDFS/on-heap reading path of Arrow’s Java library.</p>
 
 <h3 id="albis-high-performance-file-format-for-big-data-systems">Albis: High-Performance File Format for Big Data Systems</h3>
 
@@ -180,7 +180,7 @@ This is the first blog post in a multi-part series where we will focus on relati
 <h3 id="summary">Summary</h3>
 <div style="text-align: justify">
 <p>
-In this first blog of a multipart series, we have looked at the data ingestion performance of file formats on high-performance networking and storage devices. We found that popular file formats are in need for a performance revision. Based on our analysis, we designed and implemented Albis - a new file format for storing relational data. Albis and Crail share many design choices. Their combined performance of 85+ Gbps on a 100 Gbps network, gives us confidence in our approach and underlying software philosophy for both, Crail and Albis.
+In this first blog of a multipart series, we have looked at the data ingestion performance of file formats on high-performance networking and storage devices. We found that popular file formats are in need for a performance revision. Based on our analysis, we designed and implemented Albis - a new file format for storing relational data. Albis and Crail share many design choices. Their combined performance of 85+ Gbps on a 100 Gbps network, gives us confidence in our approach and underlying software philosophy for both, Crail and Albis. 
 </p>
 
 <p>

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/index.html
----------------------------------------------------------------------
diff --git a/content/blog/index.html b/content/blog/index.html
index feb6f7c..0efa9ee 100644
--- a/content/blog/index.html
+++ b/content/blog/index.html
@@ -82,7 +82,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Animesh Trivedi, Patrick Stuedi, Jonas Pfefferle, Adrian Schuepbach, and Bernard Metzler on August 8, 2018</p>
+		
+    <p class="post-meta"><b>User Post</b> by Animesh Trivedi on August 8, 2018</p>
+		
 </div>
 <hr>
 
@@ -93,7 +95,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+    
 </div>
 <hr>
 
@@ -104,7 +108,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
 </div>
 <hr>
 
@@ -115,7 +121,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle on August 22, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Jonas Pfefferle on August 22, 2017</p>
+    
 </div>
 <hr>
 
@@ -126,7 +134,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on August 18, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Patrick Stuedi on August 18, 2017</p>
+    
 </div>
 <hr>
 
@@ -137,7 +147,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on January 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Patrick Stuedi on January 17, 2017</p>
+		
 </div>
 <hr>
 

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/page2/index.html
----------------------------------------------------------------------
diff --git a/content/blog/page2/index.html b/content/blog/page2/index.html
index fe39c56..4154bc4 100644
--- a/content/blog/page2/index.html
+++ b/content/blog/page2/index.html
@@ -82,7 +82,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Animesh Trivedi, Patrick Stuedi, Jonas Pfefferle, Adrian Schuepbach, and Bernard Metzler on August 8, 2018</p>
+		
+    <p class="post-meta"><b>User Post</b> by Animesh Trivedi on August 8, 2018</p>
+		
 </div>
 <hr>
 
@@ -93,7 +95,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+    
 </div>
 <hr>
 
@@ -104,7 +108,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
 </div>
 <hr>
 
@@ -115,7 +121,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle on August 22, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Jonas Pfefferle on August 22, 2017</p>
+    
 </div>
 <hr>
 
@@ -126,7 +134,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on August 18, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Patrick Stuedi on August 18, 2017</p>
+    
 </div>
 <hr>
 
@@ -137,7 +147,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on January 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Patrick Stuedi on January 17, 2017</p>
+		
 </div>
 <hr>
 

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/page3/index.html
----------------------------------------------------------------------
diff --git a/content/blog/page3/index.html b/content/blog/page3/index.html
index 66c1c19..3953e16 100644
--- a/content/blog/page3/index.html
+++ b/content/blog/page3/index.html
@@ -82,7 +82,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Animesh Trivedi, Patrick Stuedi, Jonas Pfefferle, Adrian Schuepbach, and Bernard Metzler on August 8, 2018</p>
+		
+    <p class="post-meta"><b>User Post</b> by Animesh Trivedi on August 8, 2018</p>
+		
 </div>
 <hr>
 
@@ -93,7 +95,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+    
 </div>
 <hr>
 
@@ -104,7 +108,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
 </div>
 <hr>
 
@@ -115,7 +121,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle on August 22, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Jonas Pfefferle on August 22, 2017</p>
+    
 </div>
 <hr>
 
@@ -126,7 +134,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on August 18, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Patrick Stuedi on August 18, 2017</p>
+    
 </div>
 <hr>
 
@@ -137,7 +147,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on January 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Patrick Stuedi on January 17, 2017</p>
+		
 </div>
 <hr>
 

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/page4/index.html
----------------------------------------------------------------------
diff --git a/content/blog/page4/index.html b/content/blog/page4/index.html
index bd8bfff..62129bb 100644
--- a/content/blog/page4/index.html
+++ b/content/blog/page4/index.html
@@ -82,7 +82,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Animesh Trivedi, Patrick Stuedi, Jonas Pfefferle, Adrian Schuepbach, and Bernard Metzler on August 8, 2018</p>
+		
+    <p class="post-meta"><b>User Post</b> by Animesh Trivedi on August 8, 2018</p>
+		
 </div>
 <hr>
 
@@ -93,7 +95,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+    
 </div>
 <hr>
 
@@ -104,7 +108,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
 </div>
 <hr>
 
@@ -115,7 +121,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle on August 22, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Jonas Pfefferle on August 22, 2017</p>
+    
 </div>
 <hr>
 
@@ -126,7 +134,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on August 18, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Patrick Stuedi on August 18, 2017</p>
+    
 </div>
 <hr>
 
@@ -137,7 +147,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on January 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Patrick Stuedi on January 17, 2017</p>
+		
 </div>
 <hr>
 

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/blog/page5/index.html
----------------------------------------------------------------------
diff --git a/content/blog/page5/index.html b/content/blog/page5/index.html
index 2aaeb13..bb9ecdd 100644
--- a/content/blog/page5/index.html
+++ b/content/blog/page5/index.html
@@ -82,7 +82,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Animesh Trivedi, Patrick Stuedi, Jonas Pfefferle, Adrian Schuepbach, and Bernard Metzler on August 8, 2018</p>
+		
+    <p class="post-meta"><b>User Post</b> by Animesh Trivedi on August 8, 2018</p>
+		
 </div>
 <hr>
 
@@ -93,7 +95,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Adrian Schuepbach and Patrick Stuedi on November 21, 2017</p>
+    
 </div>
 <hr>
 
@@ -104,7 +108,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Jonas Pfefferle, Patrick Stuedi, Animesh Trivedi, Bernard Metzler, Adrian Schuepbach on November 17, 2017</p>
+		
 </div>
 <hr>
 
@@ -115,7 +121,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Jonas Pfefferle on August 22, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Jonas Pfefferle on August 22, 2017</p>
+    
 </div>
 <hr>
 
@@ -126,7 +134,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on August 18, 2017</p>
+		
+    <p class="post-meta"><b>Developer Post</b> by Patrick Stuedi on August 18, 2017</p>
+    
 </div>
 <hr>
 
@@ -137,7 +147,9 @@
         </h2>
         
     </a>
-    <p class="post-meta">Posted by Patrick Stuedi on January 17, 2017</p>
+		
+    <p class="post-meta"><b>User Post</b> by Patrick Stuedi on January 17, 2017</p>
+		
 </div>
 <hr>
 

http://git-wip-us.apache.org/repos/asf/incubator-crail-website/blob/8e2b6190/content/feed.xml
----------------------------------------------------------------------
diff --git a/content/feed.xml b/content/feed.xml
index 5a18596..c02e891 100644
--- a/content/feed.xml
+++ b/content/feed.xml
@@ -1,6 +1,6 @@
-<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="3.6.2">Jekyll</generator><link href="http://crail.incubator.apache.org//feed.xml" rel="self" type="application/atom+xml" /><link href="http://crail.incubator.apache.org//" rel="alternate" type="text/html" /><updated>2018-08-14T15:50:25+02:00</updated><id>http://crail.incubator.apache.org//</id><title type="html">The Apache Crail (Incubating) Project</title><entry><title type="html">Sql P1 News</title><link href="http://crail.incubator.apache.org//blog/2018/08/sql-p1-news.html" rel="alternate" type="text/html" title="Sql P1 News" /><published>2018-08-09T00:00:00+02:00</published><updated>2018-08-09T00:00:00+02:00</updated><id>http://crail.incubator.apache.org//blog/2018/08/sql-p1-news</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/08/sql-p1-news.html">&lt;p&gt;A new blog &lt;a href=&quot;//crail.incubator.apache.org/blog/201
 8/08/sql-p1.html&quot;&gt;post&lt;/a&gt; discussing file formats performance is now online&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">A new blog post discussing file formats performance is now online</summary></entry><entry><title type="html">SQL Performance: Part 1 - Input File Formats</title><link href="http://crail.incubator.apache.org//blog/2018/08/sql-p1.html" rel="alternate" type="text/html" title="SQL Performance: Part 1 - Input File Formats" /><published>2018-08-08T00:00:00+02:00</published><updated>2018-08-08T00:00:00+02:00</updated><id>http://crail.incubator.apache.org//blog/2018/08/sql-p1</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/08/sql-p1.html">&lt;div style=&quot;text-align: justify&quot;&gt;
+<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="3.7.0">Jekyll</generator><link href="http://crail.incubator.apache.org//feed.xml" rel="self" type="application/atom+xml" /><link href="http://crail.incubator.apache.org//" rel="alternate" type="text/html" /><updated>2018-09-06T11:26:35+02:00</updated><id>http://crail.incubator.apache.org//</id><title type="html">The Apache Crail (Incubating) Project</title><entry><title type="html">Sql P1 News</title><link href="http://crail.incubator.apache.org//blog/2018/08/sql-p1-news.html" rel="alternate" type="text/html" title="Sql P1 News" /><published>2018-08-09T00:00:00+02:00</published><updated>2018-08-09T00:00:00+02:00</updated><id>http://crail.incubator.apache.org//blog/2018/08/sql-p1-news</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/08/sql-p1-news.html">&lt;p&gt;A new blog &lt;a href=&quot;//crail.incubator.apache.org/blog/201
 8/08/sql-p1.html&quot;&gt;post&lt;/a&gt; discussing file formats performance is now online&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">A new blog post discussing file formats performance is now online</summary></entry><entry><title type="html">SQL Performance: Part 1 - Input File Formats</title><link href="http://crail.incubator.apache.org//blog/2018/08/sql-p1.html" rel="alternate" type="text/html" title="SQL Performance: Part 1 - Input File Formats" /><published>2018-08-08T00:00:00+02:00</published><updated>2018-08-08T00:00:00+02:00</updated><id>http://crail.incubator.apache.org//blog/2018/08/sql-p1</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/08/sql-p1.html">&lt;div style=&quot;text-align: justify&quot;&gt;
 &lt;p&gt;
-This is the first blog post in a multi-part series where we will focus on relational data processing performance (e.g., SQL) in presence of high-performance network and storage devices - the kind of devices that Crail targets. Relational data processing is one of the most popular and versatile workloads people run in the  cloud. The general idea is that data is stored in tables with a schema, and is processed using a domain specific language like SQL. Examples of some popular systems that support such relational data analytics in the cloud are &lt;a href=&quot;https://spark.apache.org/sql/&quot;&gt;Apache Spark/SQL&lt;/a&gt;, &lt;a href=&quot;https://hive.apache.org/&quot;&gt;Apache Hive&lt;/a&gt;, &lt;a href=&quot;https://impala.apache.org/&quot;&gt;Apache Impala&lt;/a&gt;, etc. In this post, we discuss the important first step in relational data processing, which is the reading of input data tables.
+This is the first user blog post in a multi-part series where we will focus on relational data processing performance (e.g., SQL) in presence of high-performance network and storage devices - the kind of devices that Crail targets. Relational data processing is one of the most popular and versatile workloads people run in the  cloud. The general idea is that data is stored in tables with a schema, and is processed using a domain specific language like SQL. Examples of some popular systems that support such relational data analytics in the cloud are &lt;a href=&quot;https://spark.apache.org/sql/&quot;&gt;Apache Spark/SQL&lt;/a&gt;, &lt;a href=&quot;https://hive.apache.org/&quot;&gt;Apache Hive&lt;/a&gt;, &lt;a href=&quot;https://impala.apache.org/&quot;&gt;Apache Impala&lt;/a&gt;, etc. In this post, we discuss the important first step in relational data processing, which is the reading of input data tables.
 &lt;/p&gt;
 &lt;/div&gt;
 
@@ -33,7 +33,7 @@ This is the first blog post in a multi-part series where we will focus on relati
 
 &lt;h3 id=&quot;overview&quot;&gt;Overview&lt;/h3&gt;
 
-&lt;p&gt;In a typical cloud-based relational data processing setup, the input data is stored on an external data storage solution like HDFS or AWS S3. Data tables and their associated schema are converted into a storage-friendly format for optimal performance. Examples of some popular and familiar file formats are &lt;a href=&quot;https://parquet.apache.org/&quot;&gt;Apache Parquet&lt;/a&gt;, &lt;a href=&quot;https://orc.apache.org/&quot;&gt;Apache ORC&lt;/a&gt;, &lt;a href=&quot;https://avro.apache.org/&quot;&gt;Apache Avro&lt;/a&gt;, &lt;a href=&quot;https://en.wikipedia.org/wiki/JSON&quot;&gt;JSON&lt;/a&gt;, etc. More recently, &lt;a href=&quot;https://arrow.apache.org/&quot;&gt;Apache Arrow&lt;/a&gt; has been introduced to standardize the in-memory columnar data representation between multiple frameworks. There is no one size fits all as all these formats have their own strengths, weaknesses, and features. In this blog, we are specifically interested in the performance of these 
 formats on modern high-performance networking and storage devices.&lt;/p&gt;
+&lt;p&gt;In a typical cloud-based relational data processing setup, the input data is stored on an external data storage solution like HDFS or AWS S3. Data tables and their associated schema are converted into a storage-friendly format for optimal performance. Examples of some popular and familiar file formats are &lt;a href=&quot;https://parquet.apache.org/&quot;&gt;Apache Parquet&lt;/a&gt;, &lt;a href=&quot;https://orc.apache.org/&quot;&gt;Apache ORC&lt;/a&gt;, &lt;a href=&quot;https://avro.apache.org/&quot;&gt;Apache Avro&lt;/a&gt;, &lt;a href=&quot;https://en.wikipedia.org/wiki/JSON&quot;&gt;JSON&lt;/a&gt;, etc. More recently, &lt;a href=&quot;https://arrow.apache.org/&quot;&gt;Apache Arrow&lt;/a&gt; has been introduced to standardize the in-memory columnar data representation between multiple frameworks. To be precise, Arrow is not a storage format but it defines an &lt;a href=&quot;https://github.com/apache/arrow/blob/master/format/IPC.md&quot;&gt;interprocess communication (I
 PC) format&lt;/a&gt; that can be used to store data in a stroage system (our binding for reading Arrow IPC messages from HDFS is available &lt;a href=&quot;https://github.com/zrlio/fileformat-benchmarks/blob/master/src/main/java/com/github/animeshtrivedi/FileBench/HdfsSeekableByteChannel.java&quot;&gt;here&lt;/a&gt;). There is no one size fits all as all these formats have their own strengths, weaknesses, and features. In this blog, we are specifically interested in the performance of these formats on modern high-performance networking and storage devices.&lt;/p&gt;
 
 &lt;figure&gt;&lt;div style=&quot;text-align:center&quot;&gt;&lt;img src=&quot;//crail.incubator.apache.org/img/blog/sql-p1/outline.svg&quot; width=&quot;550&quot; /&gt;&lt;figcaption&gt;Figure 1: The benchmarking setup with HDFS and file formats on a 100 Gbps network with NVMe flash devices. All formats contains routines for compression, encoding, and value materialization with associated I/O buffer management and data copies routines.&lt;p&gt;&lt;/p&gt;&lt;/figcaption&gt;&lt;/div&gt;&lt;/figure&gt;
 
@@ -41,7 +41,7 @@ This is the first blog post in a multi-part series where we will focus on relati
 
 &lt;figure&gt;&lt;div style=&quot;text-align:center&quot;&gt;&lt;img src=&quot;//crail.incubator.apache.org/img/blog/sql-p1/performance-all.svg&quot; width=&quot;550&quot; /&gt;&lt;figcaption&gt;Figure 2: Performance of JSON, Avro, Parquet, ORC, and Arrow on NVMe devices over a 100 Gbps network.&lt;p&gt;&lt;/p&gt;&lt;/figcaption&gt;&lt;/div&gt;&lt;/figure&gt;
 
-&lt;p&gt;We evaluate the performance of the benchmark on a 3 node HDFS cluster connected using 100 Gbps RoCE. One datanode in HDFS contains 4 NVMe devices with a collective aggregate bandwidth of 12.5 GB/sec (equals to 100 Gbps, hence, we have a balanced network and storage performance). Figure 2 shows our results where none of the file formats is able to deliver the full hardware performance for reading input files. One third of the performance is already lost in HDFS (maximum throughput 74.9 Gbps out of possible 100 Gbps). The rest of the performance is lost inside the file format implementation, which needs to deal with encoding, buffer and I/O management, compression, etc. The best performer is Apache Arrow which is designed for in-memory columnar datasets. The performance of these file formats are bounded by the performance of the CPU, which is 100% loaded during the experiment. For a detailed analysis of the file formats, please refer to our paper - &lt;a href=&quot;https://ww
 w.usenix.org/conference/atc18/presentation/trivedi&quot;&gt;Albis: High-Performance File Format for Big Data Systems (USENIX, ATC’18)&lt;/a&gt;.&lt;/p&gt;
+&lt;p&gt;We evaluate the performance of the benchmark on a 3 node HDFS cluster connected using 100 Gbps RoCE. One datanode in HDFS contains 4 NVMe devices with a collective aggregate bandwidth of 12.5 GB/sec (equals to 100 Gbps, hence, we have a balanced network and storage performance). Figure 2 shows our results where none of the file formats is able to deliver the full hardware performance for reading input files. One third of the performance is already lost in HDFS (maximum throughput 74.9 Gbps out of possible 100 Gbps). The rest of the performance is lost inside the file format implementation, which needs to deal with encoding, buffer and I/O management, compression, etc. The best performer is Apache Arrow which is designed for in-memory columnar datasets. The performance of these file formats are bounded by the performance of the CPU, which is 100% loaded during the experiment. For a detailed analysis of the file formats, please refer to our paper - &lt;a href=&quot;https://ww
 w.usenix.org/conference/atc18/presentation/trivedi&quot;&gt;Albis: High-Performance File Format for Big Data Systems (USENIX, ATC’18)&lt;/a&gt;. As a side-note on the Arrow performance - we have evaluated the performance of &lt;em&gt;implementation of Arrow’s Java library&lt;/em&gt;. As this library has been focused on interactions with off-heap memory, there is a head room for optimizing the HDFS/on-heap reading path of Arrow’s Java library.&lt;/p&gt;
 
 &lt;h3 id=&quot;albis-high-performance-file-format-for-big-data-systems&quot;&gt;Albis: High-Performance File Format for Big Data Systems&lt;/h3&gt;
 
@@ -101,13 +101,13 @@ This is the first blog post in a multi-part series where we will focus on relati
 &lt;h3 id=&quot;summary&quot;&gt;Summary&lt;/h3&gt;
 &lt;div style=&quot;text-align: justify&quot;&gt;
 &lt;p&gt;
-In this first blog of a multipart series, we have looked at the data ingestion performance of file formats on high-performance networking and storage devices. We found that popular file formats are in need for a performance revision. Based on our analysis, we designed and implemented Albis - a new file format for storing relational data. Albis and Crail share many design choices. Their combined performance of 85+ Gbps on a 100 Gbps network, gives us confidence in our approach and underlying software philosophy for both, Crail and Albis.
+In this first blog of a multipart series, we have looked at the data ingestion performance of file formats on high-performance networking and storage devices. We found that popular file formats are in need for a performance revision. Based on our analysis, we designed and implemented Albis - a new file format for storing relational data. Albis and Crail share many design choices. Their combined performance of 85+ Gbps on a 100 Gbps network, gives us confidence in our approach and underlying software philosophy for both, Crail and Albis. 
 &lt;/p&gt;
 
 &lt;p&gt;
 Stay tuned for the next part where we look at workload-level performance in Spark/SQL on modern high-performance networking and storage devices. Meanwhile let us know if you have any feedback or comments. 
 &lt;/p&gt;
-&lt;/div&gt;</content><author><name>Animesh Trivedi, Patrick Stuedi, Jonas Pfefferle, Adrian Schuepbach, and Bernard Metzler</name></author><category term="blog" /><summary type="html">This is the first blog post in a multi-part series where we will focus on relational data processing performance (e.g., SQL) in presence of high-performance network and storage devices - the kind of devices that Crail targets. Relational data processing is one of the most popular and versatile workloads people run in the cloud. The general idea is that data is stored in tables with a schema, and is processed using a domain specific language like SQL. Examples of some popular systems that support such relational data analytics in the cloud are Apache Spark/SQL, Apache Hive, Apache Impala, etc. In this post, we discuss the important first step in relational data processing, which is the reading of input data tables.</summary></entry><entry><title type="html">Sparksummit</title><link href="http://crail.i
 ncubator.apache.org//blog/2018/06/sparksummit.html" rel="alternate" type="text/html" title="Sparksummit" /><published>2018-06-05T00:00:00+02:00</published><updated>2018-06-05T00:00:00+02:00</updated><id>http://crail.incubator.apache.org//blog/2018/06/sparksummit</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/06/sparksummit.html">&lt;p&gt;A Spark serverless architecture powered by Crail will be presented today at the &lt;a href=&quot;https://databricks.com/session/serverless-machine-learning-on-modern-hardware-using-apache-spark&quot;&gt;Spark Summit&lt;/a&gt;&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">A Spark serverless architecture powered by Crail will be presented today at the Spark Summit</summary></entry><entry><title type="html">Dataworks</title><link href="http://crail.incubator.apache.org//blog/2018/06/dataworks.html" rel="alternate" type="text/html" title="Dataworks" /><published>2018-06-05T
 00:00:00+02:00</published><updated>2018-06-05T00:00:00+02:00</updated><id>http://crail.incubator.apache.org//blog/2018/06/dataworks</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/06/dataworks.html">&lt;p&gt;Apache Crail (incubating) to feature in the &lt;a href=&quot;https://dataworkssummit.com/san-jose-2018/session/data-processing-at-the-speed-of-100-gbpsapache-crail-incubating/&quot;&gt;DataWorks Summit&lt;/a&gt; on June 21st&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">Apache Crail (incubating) to feature in the DataWorks Summit on June 21st</summary></entry><entry><title type="html">Apache Release</title><link href="http://crail.incubator.apache.org//blog/2018/06/apache-release.html" rel="alternate" type="text/html" title="Apache Release" /><published>2018-06-04T00:00:00+02:00</published><updated>2018-06-04T00:00:00+02:00</updated><id>http://crail.incubator.apache.org//blog/2018/06/apache-release</
 id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/06/apache-release.html">&lt;p&gt;Apache Crail 1.0 incubator &lt;a href=&quot;//crail.incubator.apache.org/download&quot;&gt;release&lt;/a&gt;&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">Apache Crail 1.0 incubator release</summary></entry><entry><title type="html">Apache</title><link href="http://crail.incubator.apache.org//blog/2018/01/apache.html" rel="alternate" type="text/html" title="Apache" /><published>2018-01-22T00:00:00+01:00</published><updated>2018-01-22T00:00:00+01:00</updated><id>http://crail.incubator.apache.org//blog/2018/01/apache</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/01/apache.html">&lt;p&gt;Crail is now an Apache Incubator project!&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">Crail is now an Apache Incubator project!</summary></entry><entry><title ty
 pe="html">Iops</title><link href="http://crail.incubator.apache.org//blog/2017/11/iops.html" rel="alternate" type="text/html" title="Iops" /><published>2017-11-23T00:00:00+01:00</published><updated>2017-11-23T00:00:00+01:00</updated><id>http://crail.incubator.apache.org//blog/2017/11/iops</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2017/11/iops.html">&lt;p&gt;New blog &lt;a href=&quot;//crail.incubator.apache.org/blog/2017/11/crail-metadata.html&quot;&gt;post&lt;/a&gt; about Crail’s metadata performance and scalability&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">New blog post about Crail’s metadata performance and scalability</summary></entry><entry><title type="html">Crail Storage Performance – Part III: Metadata</title><link href="http://crail.incubator.apache.org//blog/2017/11/crail-metadata.html" rel="alternate" type="text/html" title="Crail Storage Performance -- Part III: Metadata" /><publis
 hed>2017-11-21T00:00:00+01:00</published><updated>2017-11-21T00:00:00+01:00</updated><id>http://crail.incubator.apache.org//blog/2017/11/crail-metadata</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2017/11/crail-metadata.html">&lt;div style=&quot;text-align: justify&quot;&gt;
+&lt;/div&gt;</content><author><name>Animesh Trivedi</name></author><category term="blog" /><summary type="html">This is the first user blog post in a multi-part series where we will focus on relational data processing performance (e.g., SQL) in presence of high-performance network and storage devices - the kind of devices that Crail targets. Relational data processing is one of the most popular and versatile workloads people run in the cloud. The general idea is that data is stored in tables with a schema, and is processed using a domain specific language like SQL. Examples of some popular systems that support such relational data analytics in the cloud are Apache Spark/SQL, Apache Hive, Apache Impala, etc. In this post, we discuss the important first step in relational data processing, which is the reading of input data tables.</summary></entry><entry><title type="html">Sparksummit</title><link href="http://crail.incubator.apache.org//blog/2018/06/sparksummit.html" rel="alternate" 
 type="text/html" title="Sparksummit" /><published>2018-06-05T00:00:00+02:00</published><updated>2018-06-05T00:00:00+02:00</updated><id>http://crail.incubator.apache.org//blog/2018/06/sparksummit</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/06/sparksummit.html">&lt;p&gt;A Spark serverless architecture powered by Crail will be presented today at the &lt;a href=&quot;https://databricks.com/session/serverless-machine-learning-on-modern-hardware-using-apache-spark&quot;&gt;Spark Summit&lt;/a&gt;&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">A Spark serverless architecture powered by Crail will be presented today at the Spark Summit</summary></entry><entry><title type="html">Dataworks</title><link href="http://crail.incubator.apache.org//blog/2018/06/dataworks.html" rel="alternate" type="text/html" title="Dataworks" /><published>2018-06-05T00:00:00+02:00</published><updated>2018-06-05T00:00:00+02:00</update
 d><id>http://crail.incubator.apache.org//blog/2018/06/dataworks</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/06/dataworks.html">&lt;p&gt;Apache Crail (incubating) to feature in the &lt;a href=&quot;https://dataworkssummit.com/san-jose-2018/session/data-processing-at-the-speed-of-100-gbpsapache-crail-incubating/&quot;&gt;DataWorks Summit&lt;/a&gt; on June 21st&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">Apache Crail (incubating) to feature in the DataWorks Summit on June 21st</summary></entry><entry><title type="html">Apache Release</title><link href="http://crail.incubator.apache.org//blog/2018/06/apache-release.html" rel="alternate" type="text/html" title="Apache Release" /><published>2018-06-04T00:00:00+02:00</published><updated>2018-06-04T00:00:00+02:00</updated><id>http://crail.incubator.apache.org//blog/2018/06/apache-release</id><content type="html" xml:base="http://crail.incubator.apache.org/
 /blog/2018/06/apache-release.html">&lt;p&gt;Apache Crail 1.0 incubator &lt;a href=&quot;//crail.incubator.apache.org/download&quot;&gt;release&lt;/a&gt;&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">Apache Crail 1.0 incubator release</summary></entry><entry><title type="html">Apache</title><link href="http://crail.incubator.apache.org//blog/2018/01/apache.html" rel="alternate" type="text/html" title="Apache" /><published>2018-01-22T00:00:00+01:00</published><updated>2018-01-22T00:00:00+01:00</updated><id>http://crail.incubator.apache.org//blog/2018/01/apache</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2018/01/apache.html">&lt;p&gt;Crail is now an Apache Incubator project!&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">Crail is now an Apache Incubator project!</summary></entry><entry><title type="html">Iops</title><link href="http://crail.incubator.apache.org/
 /blog/2017/11/iops.html" rel="alternate" type="text/html" title="Iops" /><published>2017-11-23T00:00:00+01:00</published><updated>2017-11-23T00:00:00+01:00</updated><id>http://crail.incubator.apache.org//blog/2017/11/iops</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2017/11/iops.html">&lt;p&gt;New blog &lt;a href=&quot;//crail.incubator.apache.org/blog/2017/11/crail-metadata.html&quot;&gt;post&lt;/a&gt; about Crail’s metadata performance and scalability&lt;/p&gt;</content><author><name></name></author><category term="news" /><summary type="html">New blog post about Crail’s metadata performance and scalability</summary></entry><entry><title type="html">Crail Storage Performance – Part III: Metadata</title><link href="http://crail.incubator.apache.org//blog/2017/11/crail-metadata.html" rel="alternate" type="text/html" title="Crail Storage Performance -- Part III: Metadata" /><published>2017-11-21T00:00:00+01:00</published><updated>2017-11-21T00:00:0
 0+01:00</updated><id>http://crail.incubator.apache.org//blog/2017/11/crail-metadata</id><content type="html" xml:base="http://crail.incubator.apache.org//blog/2017/11/crail-metadata.html">&lt;div style=&quot;text-align: justify&quot;&gt;
 &lt;p&gt;
 This is part III of our series of posts discussing Crail's raw storage performance. This part is about Crail's metadata performance and scalability.
 &lt;/p&gt;