You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by da...@apache.org on 2016/11/05 17:12:52 UTC
[49/53] [partial] incubator-beam-site git commit: Move files

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_posts/2016-03-17-capability-matrix.md
----------------------------------------------------------------------
diff --git a/_posts/2016-03-17-capability-matrix.md b/_posts/2016-03-17-capability-matrix.md
deleted file mode 100644
index 939f704..0000000
--- a/_posts/2016-03-17-capability-matrix.md
+++ /dev/null
@@ -1,598 +0,0 @@
----
-layout: post
-title:  "Clarifying & Formalizing Runner Capabilities"
-date:   2016-03-17 11:00:00 -0700
-excerpt_separator: <!--more-->
-categories: beam capability
-authors:
-  - fjp
-  - takidau
-
-capability-matrix-snapshot:
-  columns:
-    - class: model
-      name: Beam Model
-    - class: dataflow
-      name: Google Cloud Dataflow
-    - class: flink
-      name: Apache Flink
-    - class: spark
-      name: Apache Spark
-  categories:
-    - description: What is being computed?
-      anchor: what
-      color-b: 'ca1'
-      color-y: 'ec3'
-      color-p: 'fe5'
-      color-n: 'ddd'
-      rows:
-        - name: ParDo
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: element-wise processing
-              l3: Element-wise transformation parameterized by a chunk of user code. Elements are processed in bundles, with initialization and termination hooks. Bundle size is chosen by the runner and cannot be controlled by user code. ParDo processes a main input PCollection one element at a time, but provides side input access to additional PCollections.
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: Batch mode uses large bundle sizes. Streaming uses smaller bundle sizes.
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: ParDo itself, as per-element transformation with UDFs, is fully supported by Flink for both batch and streaming.
-            - class: spark
-              l1: 'Yes'
-              l2: fully supported
-              l3: ParDo applies per-element transformations as Spark FlatMapFunction.
-        - name: GroupByKey
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: key grouping
-              l3: Grouping of key-value pairs per key, window, and pane. (See also other tabs.)
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: "Uses Flink's keyBy for key grouping. When grouping by window in streaming (creating the panes) the Flink runner uses the Beam code. This guarantees support for all windowing and triggering mechanisms."
-            - class: spark
-              l1: 'Partially'
-              l2: group by window in batch only
-              l3: "Uses Spark's groupByKey for grouping. Grouping by window is currently only supported in batch."
-        - name: Flatten
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: collection concatenation
-              l3: Concatenates multiple homogenously typed collections together.
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: ''
-            - class: spark
-              l1: 'Yes'
-              l2: fully supported
-              l3: ''
-              
-        - name: Combine
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: associative &amp; commutative aggregation
-              l3: 'Application of an associative, commutative operation over all values ("globally") or over all values associated with each key ("per key"). Can be implemented using ParDo, but often more efficient implementations exist.'
-            - class: dataflow
-              l1: 'Yes'
-              l2: 'efficient execution'
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: 'fully supported'
-              l3: Uses a combiner for pre-aggregation for batch and streaming.
-            - class: spark
-              l1: 'Yes'
-              l2: fully supported
-              l3: Supports GroupedValues, Globally and PerKey.
-
-        - name: Composite Transforms
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: user-defined transformation subgraphs
-              l3: Allows easy extensibility for library writers.  In the near future, we expect there to be more information provided at this level -- customized metadata hooks for monitoring, additional runtime/environment hooks, etc.
-            - class: dataflow
-              l1: 'Partially'
-              l2: supported via inlining
-              l3: Currently composite transformations are inlined during execution. The structure is later recreated from the names, but other transform level information (if added to the model) will be lost.
-            - class: flink
-              l1: 'Partially'
-              l2: supported via inlining
-              l3: ''
-            - class: spark
-              l1: 'Partially'
-              l2: supported via inlining
-              l3: ''
-
-        - name: Side Inputs
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: additional elements available during DoFn execution
-              l3: Side inputs are additional <tt>PCollections</tt> whose contents are computed during pipeline execution and then made accessible to DoFn code. The exact shape of the side input depends both on the <tt>PCollectionView</tt> used to describe the access pattern (interable, map, singleton) and the window of the element from the main input that is currently being processed.
-            - class: dataflow
-              l1: 'Yes'
-              l2: some size restrictions in streaming
-              l3: Batch implemented supports a distributed implementation, but streaming mode may force some size restrictions. Neither mode is able to push lookups directly up into key-based sources.
-            - class: flink
-              jira: BEAM-102
-              l1: 'Partially'
-              l2: no supported in streaming
-              l3: Supported in batch. Side inputs for streaming are currently WiP.
-            - class: spark
-              l1: 'Partially'
-              l2: not supported in streaming
-              l3: "Side input is actually a broadcast variable in Spark so it can't be updated during the life of a job. Spark-runner implementation of side input is more of an immutable, static, side input."
-
-        - name: Source API
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: user-defined sources
-              l3: Allows users to provide additional input sources. Supports both bounded and unbounded data. Includes hooks necessary to provide efficient parallelization (size estimation, progress information, dynamic splitting, etc).
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: 
-            - class: flink
-              jira: BEAM-103
-              l1: 'Partially'
-              l2: parallelism 1 in streaming
-              l3: Fully supported in batch. In streaming, sources currently run with parallelism 1.
-            - class: spark
-              l1: 'Yes'
-              l2: fully supported
-              l3: 
-              
-        - name: Aggregators
-          values:
-            - class: model
-              l1: 'Partially'
-              l2: user-provided metrics
-              l3: Allow transforms to aggregate simple metrics across bundles in a <tt>DoFn</tt>. Semantically equivalent to using a side output, but support partial results as the transform executes. Will likely want to augment <tt>Aggregators</tt> to be more useful for processing unbounded data by making them windowed.
-            - class: dataflow
-              l1: 'Partially'
-              l2: may miscount in streaming mode
-              l3: Current model is fully supported in batch mode. In streaming mode, <tt>Aggregators</tt> may under or overcount when bundles are retried.
-            - class: flink
-              l1: 'Partially'
-              l2: may undercount in streaming
-              l3: Current model is fully supported in batch. In streaming mode, <tt>Aggregators</tt> may undercount.
-            - class: spark
-              l1: 'Partially'
-              l2: streaming requires more testing
-              l3: "Uses Spark's <tt>AccumulatorParam</tt> mechanism"
-
-        - name: Keyed State
-          values:
-            - class: model
-              jira: BEAM-25
-              l1: 'No'
-              l2: storage per key, per window
-              l3: Allows fine-grained access to per-key, per-window persistent state. Necessary for certain use cases (e.g. high-volume windows which store large amounts of data, but typically only access small portions of it; complex state machines; etc.) that are not easily or efficiently addressed via <tt>Combine</tt> or <tt>GroupByKey</tt>+<tt>ParDo</tt>. 
-            - class: dataflow
-              l1: 'No'
-              l2: pending model support
-              l3: Dataflow already supports keyed state internally, so adding support for this should be easy once the Beam model exposes it.
-            - class: flink
-              l1: 'No'
-              l2: pending model support
-              l3: Flink already supports keyed state, so adding support for this should be easy once the Beam model exposes it.
-            - class: spark
-              l1: 'No'
-              l2: pending model support
-              l3: Spark supports keyed state with mapWithState() so support shuold be straight forward.
-              
-              
-    - description: Where in event time?
-      anchor: where
-      color-b: '37d'
-      color-y: '59f'
-      color-p: '8cf'
-      color-n: 'ddd'
-      rows:
-        - name: Global windows
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: all time
-              l3: The default window which covers all of time. (Basically how traditional batch cases fit in the model.)
-            - class: dataflow
-              l1: 'Yes'
-              l2: default
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: supported
-              l3: ''
-            - class: spark
-              l1: 'Yes'
-              l2: supported
-              l3: ''
-              
-        - name: Fixed windows
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: periodic, non-overlapping
-              l3: Fixed-size, timestamp-based windows. (Hourly, Daily, etc)
-            - class: dataflow
-              l1: 'Yes'
-              l2: built-in
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: supported
-              l3: ''
-            - class: spark
-              l1: Partially
-              l2: currently only supported in batch
-              l3: ''
-              
-        - name: Sliding windows
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: periodic, overlapping
-              l3: Possibly overlapping fixed-size timestamp-based windows (Every minute, use the last ten minutes of data.)
-            - class: dataflow
-              l1: 'Yes'
-              l2: built-in
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: supported
-              l3: ''
-            - class: spark
-              l1: 'No'
-              l2: ''
-              l3: ''
-
-        - name: Session windows
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: activity-based
-              l3: Based on bursts of activity separated by a gap size. Different per key.
-            - class: dataflow
-              l1: 'Yes'
-              l2: built-in
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: pending Spark engine support
-              l3: ''
-
-        - name: Custom windows
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: user-defined windows
-              l3: All windows must implement <tt>BoundedWindow</tt>, which specifies a max timestamp. Each <tt>WindowFn</tt> assigns elements to an associated window.
-            - class: dataflow
-              l1: 'Yes'
-              l2: supported
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: pending Spark engine support
-              l3: ''
-
-        - name: Custom merging windows
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: user-defined merging windows
-              l3: A custom <tt>WindowFn</tt> additionally specifies whether and how to merge windows.
-            - class: dataflow
-              l1: 'Yes'
-              l2: supported
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: pending Spark engine support
-              l3: ''
-
-        - name: Timestamp control
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: output timestamp for window panes
-              l3: For a grouping transform, such as GBK or Combine, an OutputTimeFn specifies (1) how to combine input timestamps within a window and (2) how to merge aggregated timestamps when windows merge.
-            - class: dataflow
-              l1: 'Yes'
-              l2: supported
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: pending Spark engine support
-              l3: ''
-
-
-              
-    - description: When in processing time?
-      anchor: when
-      color-b: '6a4'
-      color-y: '8c6'
-      color-p: 'ae8'
-      color-n: 'ddd'
-      rows:
-        
-        - name: Configurable triggering
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: user customizable
-              l3: Triggering may be specified by the user (instead of simply driven by hardcoded defaults).
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: Fully supported in streaming mode. In batch mode, intermediate trigger firings are effectively meaningless.
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: ''
-              l3: ''
-
-        - name: Event-time triggers
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: relative to event time
-              l3: Triggers that fire in response to event-time completeness signals, such as watermarks progressing.
-            - class: dataflow
-              l1: 'Yes'
-              l2: yes in streaming, fixed granularity in batch
-              l3: Fully supported in streaming mode. In batch mode, currently watermark progress jumps from the beginning of time to the end of time once the input has been fully consumed, thus no additional triggering granularity is available.
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: ''
-              l3: ''
-              
-        - name: Processing-time triggers
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: relative to processing time
-              l3: Triggers that fire in response to processing-time advancing.
-            - class: dataflow
-              l1: 'Yes'
-              l2: yes in streaming, fixed granularity in batch
-              l3: Fully supported in streaming mode. In batch mode, from the perspective of triggers, processing time currently jumps from the beginning of time to the end of time once the input has been fully consumed, thus no additional triggering granularity is available.
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'Yes'
-              l2: "This is Spark streaming's native model"
-              l3: "Spark processes streams in micro-batches. The micro-batch size is actually a pre-set, fixed, time interval. Currently, the runner takes the first window size in the pipeline and sets it's size as the batch interval. Any following window operations will be considered processing time windows and will affect triggering."
-              
-        - name: Count triggers
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: every N elements
-              l3: Triggers that fire after seeing at least N elements.
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: Fully supported in streaming mode. In batch mode, elements are processed in the largest bundles possible, so count-based triggers are effectively meaningless.
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: ''
-              l3: ''
-
-        - name: '[Meta]data driven triggers'
-          values:
-            - class: model
-              jira: BEAM-101
-              l1: 'No'
-              l2: in response to data
-              l3: Triggers that fire in response to attributes of the data being processed.
-            - class: dataflow
-              l1: 'No'
-              l2: pending model support
-              l3: 
-            - class: flink
-              l1: 'No'
-              l2: pending model support
-              l3: 
-            - class: spark
-              l1: 'No'
-              l2: pending model support
-              l3: 
-
-        - name: Composite triggers
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: compositions of one or more sub-triggers
-              l3: Triggers which compose other triggers in more complex structures, such as logical AND, logical OR, early/on-time/late, etc.
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: ''
-              l3: ''
-              
-        - name: Allowed lateness
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: event-time bound on window lifetimes
-              l3: A way to bound the useful lifetime of a window (in event time), after which any unemitted results may be materialized, the window contents may be garbage collected, and any addtional late data that arrive for the window may be discarded.
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: Fully supported in streaming mode. In batch mode no data is ever late.
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: ''
-              l3: ''
-              
-        - name: Timers
-          values:
-            - class: model
-              jira: BEAM-27
-              l1: 'No'
-              l2: delayed processing callbacks
-              l3: A fine-grained mechanism for performing work at some point in the future, in either the event-time or processing-time domain. Useful for orchestrating delayed events, timeouts, etc in complex state per-key, per-window state machines.
-            - class: dataflow
-              l1: 'No'
-              l2: pending model support
-              l3: Dataflow already supports timers internally, so adding support for this should be easy once the Beam model exposes it.
-            - class: flink
-              l1: 'No'
-              l2: pending model support
-              l3: Flink already supports timers internally, so adding support for this should be easy once the Beam model exposes it.
-            - class: spark
-              l1: 'No'
-              l2: pending model support
-              l3: ''
-              
-              
-    - description: How do refinements relate?
-      anchor: how
-      color-b: 'b55'
-      color-y: 'd77'
-      color-p: 'faa'
-      color-n: 'ddd'
-      rows:
-        
-        - name: Discarding
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: panes discard elements when fired
-              l3: Elements are discarded from accumulated state as their pane is fired.
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: ''
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'Yes'
-              l2: fully supported
-              l3: 'Spark streaming natively discards elements after firing.'
-              
-        - name: Accumulating
-          values:
-            - class: model
-              l1: 'Yes'
-              l2: panes accumulate elements across firings
-              l3: Elements are accumulated in state across multiple pane firings for the same window.
-            - class: dataflow
-              l1: 'Yes'
-              l2: fully supported
-              l3: Requires that the accumulated pane fits in memory, after being passed through the combiner (if relevant)
-            - class: flink
-              l1: 'Yes'
-              l2: fully supported
-              l3: "The Runner uses Beam's Windowing and Triggering logic and code."
-            - class: spark
-              l1: 'No'
-              l2: ''
-              l3: ''
-              
-        - name: 'Accumulating &amp; Retracting'
-          values:
-            - class: model
-              jira: BEAM-91
-              l1: 'No'
-              l2: accumulation plus retraction of old panes
-              l3: Elements are accumulated across multiple pane firings and old emitted values are retracted. Also known as "backsies" ;-D
-            - class: dataflow
-              l1: 'No'
-              l2: pending model support
-              l3: ''
-            - class: flink
-              l1: 'No'
-              l2: pending model support
-              l3: ''
-            - class: spark
-              l1: 'No'
-              l2: pending model support
-              l3: ''
-              
-
----
-
-With initial code drops complete ([Dataflow SDK and Runner](https://github.com/apache/incubator-beam/pull/1), [Flink Runner](https://github.com/apache/incubator-beam/pull/12), [Spark Runner](https://github.com/apache/incubator-beam/pull/42)) and expressed interest in runner implementations for [Storm](https://issues.apache.org/jira/browse/BEAM-9), [Hadoop](https://issues.apache.org/jira/browse/BEAM-19), and [Gearpump](https://issues.apache.org/jira/browse/BEAM-79) (amongst others), we wanted to start addressing a big question in the Apache Beam (incubating) community: what capabilities will each runner be able to support?
-
-<!--more-->
-
-While we\u2019d love to have a world where all runners support the full suite of semantics included in the Beam Model (formerly referred to as the [Dataflow Model](http://www.vldb.org/pvldb/vol8/p1792-Akidau.pdf)), practically speaking, there will always be certain features that some runners can\u2019t provide. For example, a Hadoop-based runner would be inherently batch-based and may be unable to (easily) implement support for unbounded collections. However, that doesn\u2019t prevent it from being extremely useful for a large set of uses. In other cases, the implementations provided by one runner may have slightly different semantics that those provided by another (e.g. even though the current suite of runners all support exactly-once delivery guarantees, an [Apache Samza](http://samza.apache.org/) runner, which would be a welcome addition, would currently only support at-least-once).
-
-To help clarify things, we\u2019ve been working on enumerating the key features of the Beam model in a [capability matrix]({{ site.baseurl }}/learn/runners/capability-matrix/) for all existing runners, categorized around the four key questions addressed by the model: <span class="wwwh-what-dark">What</span> / <span class="wwwh-where-dark">Where</span> / <span class="wwwh-when-dark">When</span> / <span class="wwwh-how-dark">How</span> (if you\u2019re not familiar with those questions, you might want to read through [Streaming 102](http://oreilly.com/ideas/the-world-beyond-batch-streaming-102) for an overview). This table will be maintained over time as the model evolves, our understanding grows, and runners are created or features added.
-
-Included below is a summary snapshot of our current understanding of the capabilities of the existing runners (see the [live version]({{ site.baseurl }}/learn/runners/capability-matrix/) for full details, descriptions, and Jira links); since integration is still under way, the system as whole isn\u2019t yet in a completely stable, usable state. But that should be changing in the near future, and we\u2019ll be updating loud and clear on this blog when the first supported Beam 1.0 release happens.
-
-In the meantime, these tables should help clarify where we expect to be in the very near term, and help guide expectations about what existing runners are capable of, and what features runner implementers will be tackling next.
-
-{% include capability-matrix-common.md %}
-{% assign cap-data=page.capability-matrix-snapshot %}
-
-<!-- Summary table -->
-{% assign cap-style='cap-summary' %}
-{% assign cap-view='blog' %}
-{% assign cap-other-view='full' %}
-{% assign cap-toggle-details=1 %}
-{% assign cap-display='block' %}
-
-{% include capability-matrix.md %}

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_posts/2016-04-03-presentation-materials.md
----------------------------------------------------------------------
diff --git a/_posts/2016-04-03-presentation-materials.md b/_posts/2016-04-03-presentation-materials.md
deleted file mode 100644
index 3bc05cb..0000000
--- a/_posts/2016-04-03-presentation-materials.md
+++ /dev/null
@@ -1,24 +0,0 @@
----
-layout: post
-title:  "Apache Beam Presentation Materials"
-date:   2016-04-03 11:00:00 -0700
-excerpt_separator: <!--more-->
-categories: beam capability
-authors:
-  - fjp
-  - takidau
----
-
-Are you interested in giving a presentation about Apache Beam? Perhaps you want to talk about Apache Beam at a local Meetup or a convention. Excellent!  The Apache Beam community is excited to expand and grow the community. To help kickstart this process, we are excited to announce an initial set of [Apache Beam presentation materials]({{ site.baseurl }}/learn/presentation-materials/) which anyone can use to give a presentation about Apache Beam.
-
-<!--more-->
-
-As a community, we want to build a shared collection of high quality presentation materials. This initial set includes the following slide decks:
-
-* The Apache Beam model
-    * [Long version](https://goo.gl/r0nvWh)
-    * [Short version](https://goo.gl/h5D1yR)
-* Using Apache Beam with runners
-    * [Google Cloud Dataflow](https://goo.gl/2ay8mi)
-
-As Apache Beam grows, so will this repository of presentation materials. We are excited to add new materials as the Apache Beam ecosystem grows with new runners, SDKs, and so on. If you are interested in contributing content or have a request, please see the [Apache Beam presentation materials]({{ site.baseurl }}/learn/presentation-materials/) page or email the [`user@beam.incubator.apache.org`](mailto:user@beam.incubator.apache.org) mailing list with your ideas or questions.

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_posts/2016-05-18-splitAtFraction-method.md
----------------------------------------------------------------------
diff --git a/_posts/2016-05-18-splitAtFraction-method.md b/_posts/2016-05-18-splitAtFraction-method.md
deleted file mode 100644
index 649af74..0000000
--- a/_posts/2016-05-18-splitAtFraction-method.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-layout: post
-title:  "Dynamic work rebalancing for Beam"
-date:   2016-05-18 11:00:00 -0700
-excerpt_separator: <!--more-->
-categories: blog
-authors:
-  - dhalperi
----
-
-This morning, Eugene and Malo from the Google Cloud Dataflow team posted [*No shard left behind: dynamic work rebalancing in Google Cloud Dataflow*](https://cloud.google.com/blog/big-data/2016/05/no-shard-left-behind-dynamic-work-rebalancing-in-google-cloud-dataflow). This article discusses Cloud Dataflow\u2019s solution to the well-known straggler problem.
-
-<!--more-->
-
-In a large batch processing job with many tasks executing in parallel, some of the tasks -- the stragglers -- can take a much longer time to complete than others, perhaps due to imperfect splitting of the work into parallel chunks when issuing the job. Typically, waiting for stragglers means that the overall job completes later than it should, and may also reserve too many machines that may be underutilized at the end. Cloud Dataflow\u2019s dynamic work rebalancing can mitigate stragglers in most cases.
-
-What I\u2019d like to highlight for the Apache Beam (incubating) community is that Cloud Dataflow\u2019s dynamic work rebalancing is implemented using *runner-specific* control logic on top of Beam\u2019s *runner-independent* [`BoundedSource API`](https://github.com/apache/incubator-beam/blob/9fa97fb2491bc784df53fb0f044409dbbc2af3d7/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java). Specifically, to steal work from a straggler, a runner need only call the reader\u2019s [`splitAtFraction method`](https://github.com/apache/incubator-beam/blob/3edae9b8b4d7afefb5c803c19bb0a1c21ebba89d/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java#L266). This will generate a new source containing leftover work, and then the runner can pass that source off to another idle worker. As Beam matures, I hope that other runners are interested in figuring out whether these APIs can help them improve performance, implementing dynamic work rebalancing, and collaborating on API ch
 anges that will help solve other pain points.

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_posts/2016-05-20-where-is-my-pcollection-dot-map.md
----------------------------------------------------------------------
diff --git a/_posts/2016-05-20-where-is-my-pcollection-dot-map.md b/_posts/2016-05-20-where-is-my-pcollection-dot-map.md
deleted file mode 100644
index 5fc13f0..0000000
--- a/_posts/2016-05-20-where-is-my-pcollection-dot-map.md
+++ /dev/null
@@ -1,89 +0,0 @@
----
-layout: post
-title:  "Where's my PCollection.map()?"
-date:   2016-05-27 09:00:00 -0700
-excerpt_separator: <!--more-->
-categories: blog
-authors:
-  - robertwb
----
-Have you ever wondered why Beam has PTransforms for everything instead of having methods on PCollection? Take a look at the history that led to this (and other) design decisions.
-
-<!--more-->
-
-Though Beam is relatively new, its design draws heavily on many years of experience with real-world pipelines. One of the primary inspirations is [FlumeJava](http://research.google.com/pubs/pub35650.html), which is Google's internal successor to MapReduce first introduced in 2009.
-
-The original FlumeJava API has methods like `count` and `parallelDo` on the PCollections. Though slightly more succinct, this approach has many disadvantages to extensibility. Every new user to FlumeJava wanted to add transforms, and adding them as methods to PCollection simply doesn't scale well. In contrast, a PCollection in Beam has a single `apply` method which takes any PTransform as an argument.
-
-<table class="table">
-  <tr>
-    <th>FlumeJava</th>
-    <th>Beam</th>
-  </tr>
-  <tr>
-    <td><pre>
-PCollection&lt;T&gt; input = \u2026
-PCollection&lt;O&gt; output = input.count()
-                             .parallelDo(...);
-    </pre></td>
-    <td><pre>
-PCollection&lt;T&gt; input = \u2026
-PCollection&lt;O&gt; output = input.apply(Count.perElement())
-                             .apply(ParDo.of(...));
-    </pre></td>
-  </tr>
-</table>
-
-This is a more scalable approach for several reasons.
-
-## Where to draw the line?
-Adding methods to PCollection forces a line to be drawn between operations that are "useful" enough to merit this special treatment and those that are not. It is easy to make the case for flat map, group by key, and combine per key. But what about filter? Count? Approximate count? Approximate quantiles? Most frequent? WriteToMyFavoriteSource? Going too far down this path leads to a single enormous class that contains nearly everything one could want to do. (FlumeJava's PCollection class is over 5000 lines long with around 70 distinct operations, and it could have been *much* larger had we accepted every proposal.) Furthermore, since Java doesn\u2019t allow adding methods to a class, there is a sharp syntactic divide between those operations that are added to PCollection and those that aren\u2019t. A traditional way to share code is with a library of functions, but functions (in traditional languages like Java at least) are written prefix-style, which doesn't mix well with the fluent build
 er style (e.g. `input.operation1().operation2().operation3()` vs. `operation3(operation1(input).operation2())`).
-
-Instead in Beam we've chosen a style that places all transforms--whether they be primitive operations, composite operations bundled in the SDK, or part of an external library--on equal footing. This also facilitates alternative implementations (which may even take different options) that are easily interchangeable.
-
-<table class="table">
-  <tr>
-    <th>FlumeJava</th>
-    <th>Beam</th>
-  </tr>
-  <tr>
-    <td><pre>
-PCollection&lt;O&gt; output =
-    ExternalLibrary.doStuff(
-        MyLibrary.transform(input, myArgs)
-            .parallelDo(...),
-        externalLibArgs);
-    </pre></td>
-    <td><pre>
-PCollection&lt;O&gt; output = input
-    .apply(MyLibrary.transform(myArgs))
-    .apply(ParDo.of(...))
-    .apply(ExternalLibrary.doStuff(externalLibArgs));
-    &nbsp;
-    </pre></td>
-  </tr>
-</table>
-
-## Configurability
-It makes for a fluent style to let values (PCollections) be the objects passed around and manipulated (i.e. the handles to the deferred execution graph), but it is the operations themselves that need to be composable, configurable, and extendable. Using PCollection methods for the operations doesn't scale well here, especially in a language without default or keyword arguments. For example, a ParDo operation can have any number of side inputs and side outputs, or a write operation may have configurations dealing with encoding and compression. One option is to separate these out into multiple overloads or even methods, but that exacerbates the problems above. (FlumeJava evolved over a dozen overloads of the `parallelDo` method!) Another option is to pass each method a configuration object that can be built up using more fluent idioms like the builder pattern, but at that point one might as well make the configuration object the operation itself, which is what Beam does.
-
-## Type Safety
-Many operations can only be applied to collections whose elements are of a specific type. For example, the GroupByKey operation should only be applied to `PCollection<KV<K, V>>`s. In Java at least, it's not possible to restrict methods based on the element type parameter alone. In FlumeJava, this led us to add a `PTable<K, V>` subclassing `PCollection<KV<K, V>>` to contain all the operations specific to PCollections of key-value pairs. This leads to the same question of which element types are special enough to merit being captured by PCollection subclasses. It is not very extensible for third parties and often requires manual downcasts/conversions (which can't be safely chained in Java) and special operations that produce these PCollection specializations.
-
-This is particularly inconvenient for transforms that produce outputs whose element types are the same as (or related to) their input's element types, requiring extra support to generate the right subclasses (e.g. a filter on a PTable should produce another PTable rather than just a raw PCollection of key-value pairs).
-
-Using PTransforms allows us to sidestep this entire issue. We can place arbitrary constraints on the context in which a transform may be used based on the type of its inputs; for instance GroupByKey is statically typed to only apply to a `PCollection<KV<K, V>>`. The way this happens is generalizable to arbitrary shapes, without needing to introduce specialized types like PTable.
-
-## Reusability and Structure
-Though PTransforms are generally constructed at the site at which they're used, by pulling them out as separate objects one is able to store them and pass them around.
-
-As pipelines grow and evolve, it is useful to structure your pipeline into modular, often reusable components, and PTransforms allow one to do this nicely in a data-processing pipeline. In addition, modular PTransforms also expose the logical structure of your code to the system (e.g. for monitoring). Of the three different representations of the WordCount pipeline below, only the structured view captures the high-level intent of the pipeline. Letting even the simple operations be PTransforms means there's less of an abrupt edge to packaging things up into composite operations.
-
-<img class="center-block" src="{{ "/images/blog/simple-wordcount-pipeline.png" | prepend: site.baseurl }}" alt="Three different visualizations of a simple WordCount pipeline" width="500">
-
-<div class="text-center">
-<i>Three different visualizations of a simple WordCount pipeline which computes the number of occurrences of every word in a set of text files. The flat view gives the full DAG of all operations performed. The execution view groups operations according to how they're executed, e.g. after performing runner-specific optimizations like function composition. The structured view nests operations according to their grouping in PTransforms.</i>
-</div>
-
-## Summary
-Although it's tempting to add methods to PCollections, such an approach is not scalable, extensible, or sufficiently expressive. Putting a single apply method on PCollection and all the logic into the operation itself lets us have the best of both worlds, and avoids hard cliffs of complexity by having a single consistent style across simple and complex pipelines, and between predefined and user-defined operations.

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_posts/2016-06-13-flink-batch-runner-milestone.md
----------------------------------------------------------------------
diff --git a/_posts/2016-06-13-flink-batch-runner-milestone.md b/_posts/2016-06-13-flink-batch-runner-milestone.md
deleted file mode 100644
index 25e0bb4..0000000
--- a/_posts/2016-06-13-flink-batch-runner-milestone.md
+++ /dev/null
@@ -1,32 +0,0 @@
----
-layout: post
-title:  "How We Added Windowing to the Apache Flink Batch Runner"
-date:   2016-06-13 09:00:00 -0700
-excerpt_separator: <!--more-->
-categories: blog
-authors:
-  - aljoscha
----
-We recently achieved a major milestone by adding support for windowing to the [Apache Flink](http://flink.apache.org) Batch runner. In this post we would like to explain what this means for users of Apache Beam and highlight some of the implementation details.
-
-<!--more-->
-
-Before we start, though, let\u2019s quickly talk about the execution of Beam programs and how this is relevant to today\u2019s post. A Beam pipeline can contain bounded and unbounded sources. If the pipeline only contains bounded sources it can be executed in a batch fashion, if it contains some unbounded sources it must be executed in a streaming fashion. When executing a Beam pipeline on Flink, you don\u2019t have to choose the execution mode. Internally, the Flink runner either translates the pipeline to a Flink `DataSet` program or a `DataStream` program, depending on whether unbounded sources are used in the pipeline. In the following, when we say \u201cBatch runner\u201d what we are really talking about is the Flink runner being in batch execution mode.
-
-## What does this mean for users?
-
-Support for windowing was the last missing puzzle piece for making the Flink Batch runner compatible with the Beam model. With the latest change to the Batch runner users can now run any pipeline that only contains bounded sources and be certain that the results match those of the original reference-implementation runners that were provided by Google as part of the initial code drop coming from the Google Dataflow SDK.
-
-The most obvious part of the change is that windows can now be assigned to elements and that the runner respects these windows for the `GroupByKey` and `Combine` operations. A not-so-obvious change concerns side-inputs. In the Beam model, side inputs respect windows; when a value of the main input is being processed only the side input that corresponds to the correct window is available to the processing function, the `DoFn`.
-
-Getting side-input semantics right is an important milestone in it\u2019s own because it allows to use a big suite of unit tests for verifying the correctness of a runner implementation. These tests exercise every obscure detail of the Beam programming model and verify that the results produced by a runner match what you would expect from a correct implementation. In the suite, side inputs are used to compare the expected result to the actual result. With these tests being executed regularly we can now be more confident that the implementation produces correct results for user-specified pipelines.
-
-## Under the Hood
-The basis for the changes is the introduction of `WindowedValue` in the generated Flink transformations. Before, a Beam `PCollection<T>` would be transformed to a `DataSet<T>`. Now, we instead create a `DataSet<WindowedValue<T>>`. The `WindowedValue<T>` stores meta data about the value, such as the timestamp and the windows to which it was assigned.
-
-With this basic change out of the way we just had to make sure that windows were respected for side inputs and that `Combine` and `GroupByKey` correctly handled windows. The tricky part there is the handling of merging windows such as session windows. For these we essentially emulate the behavior of a merging `WindowFn` in our own code.
-
-After we got side inputs working we could enable the aforementioned suite of tests to check how well the runner behaves with respect to the Beam model. As can be expected there were quite some discrepancies but we managed to resolve them all. In the process, we also slimmed down the runner implementation. For example, we removed all custom translations for sources and sinks and are now relying only on Beam code for these, thereby greatly reducing the maintenance overhead.
-
-## Summary
-We reached a major milestone in adding windowing support to the Flink Batch runner, thereby making it compatible with the Beam model. Because of the large suite of tests that can now be executed on the runner we are also confident about the correctness of the implementation and about it staying that way in the future.

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_posts/2016-06-15-first-release.md
----------------------------------------------------------------------
diff --git a/_posts/2016-06-15-first-release.md b/_posts/2016-06-15-first-release.md
deleted file mode 100644
index fe11cae..0000000
--- a/_posts/2016-06-15-first-release.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-layout: post
-title:  "The first release of Apache Beam!"
-date:   2016-06-15 00:00:01 -0700
-excerpt_separator: <!--more-->
-categories: beam release
-authors:
-  - davor
----
-
-I\u2019m happy to announce that Apache Beam has officially released its first
-version -- 0.1.0-incubating. This is an exciting milestone for the project,
-which joined the Apache Software Foundation and the Apache Incubator earlier
-this year.
-
-<!--more-->
-
-This release publishes the first set of Apache Beam binaries and source code,
-making them readily available for our users. The initial release includes the
-SDK for Java, along with three runners: Apache Flink, Apache Spark and Google
-Cloud Dataflow, a fully-managed cloud service. The release is available both
-in the [Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.beam%22),
-as well as a download from the [project\u2019s website]({{ site.baseurl }}/use/releases/).
-
-The goal of this release was process-oriented. In particular, the Beam
-community wanted to release existing functionality to our users, build and
-validate the release processes, and obtain validation from the Apache Software
-Foundation and the Apache Incubator.
-
-I\u2019d like to encourage everyone to try out this release. Please keep in mind
-that this is the first incubating release -- significant changes are to be
-expected. As we march toward stability, a rapid cadence of future releases is
-anticipated, perhaps one every 1-2 months.
-
-As always, the Beam community welcomes feedback. Stabilization, usability and
-the developer experience will be our focus for the next several months. If you
-have any comments or discover any issues, I\u2019d like to invite you to reach out
-to us via [user\u2019s mailing list]({{ site.baseurl }}/use/mailing-lists/) or the
-[Apache JIRA issue tracker](https://issues.apache.org/jira/browse/BEAM/).

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_posts/2016-08-03-six-months.md
----------------------------------------------------------------------
diff --git a/_posts/2016-08-03-six-months.md b/_posts/2016-08-03-six-months.md
deleted file mode 100644
index 73305e4..0000000
--- a/_posts/2016-08-03-six-months.md
+++ /dev/null
@@ -1,43 +0,0 @@
----
-layout: post
-title:  "Apache Beam: Six Months in Incubation"
-date:   2016-08-03 00:00:01 -0700
-excerpt_separator: <!--more-->
-categories: blog
-authors:
-  - fjp
----
-
-It\u2019s been just over six months since Apache Beam was formally accepted into incubation with the [Apache Software Foundation](http://www.apache.org). As a community, we\u2019ve been hard at work getting Beam off the ground.
-
-<!--more-->
-
-Looking just at raw numbers for those first six months, that\u2019s:
-
-* 48,238 lines of preexisting code donated by Cloudera, dataArtisans, and Google.
-* 761 pull requests from 45 contributors.
-* 498 Jira issues opened and 245 resolved.
-* 1 incubating release (and another 1 in progress). 
-* 4,200 hours of automated tests. 
-* 161 subscribers / 606 messages on user@.
-* 217 subscribers / 1205 messages on dev@.
-* 277 stars and 174 forks on GitHub.
-
-And behind those numbers, there\u2019s been a ton of technical progress, including:
-
-* Refactoring of the entire codebase, examples, and tests to be truly runner-independent.
-* New functionality in the Apache Flink runner for timestamps/windows in batch and bounded sources and side inputs in streaming mode.
-* Work in progress to upgrade the Apache Spark runner to use Spark 2.0.
-* Several new runners from the wider Apache community -- Apache Gearpump has its own feature branch, Apache Apex has a PR, and conversations are starting on Apache Storm and others.
-* New SDKs/DSLs for exposing the Beam model -- the Python SDK from Google is in on a feature branch, and there are plans to add the Scio DSL from Spotify.
-* Support for additional data sources and sinks -- Apache Kafka and JMS are in, there are PRs for Amazon Kinesis, Apache Cassandra, and MongoDB, and more connectors are being planned.
-
-But perhaps most importantly, we\u2019re committed to building an involved, welcoming community. So far, we\u2019ve:
-
-* Started building a vibrant developer community, with detailed design discussions on features like DoFn reuse semantics, serialization technology, and an API for accessing state.
-* Started building a user community with an active mailing list and improvements to the website and documentation.
-* Had multiple talks on Beam at venues including ApacheCon, Hadoop Summit, Kafka Summit, JBCN Barcelona, and Strata.
-* Presented at multiple existing meetups and are starting to organize some of our own.
-
-While it\u2019s nice to reflect back on all we\u2019ve done, we\u2019re working full _stream_ ahead towards a stable release and graduation from incubator. And we\u2019d love your help -- join the [mailing lists]({{ site.baseurl }}/use/mailing-lists/), check out the [contribution guide]({{ site.baseurl }}/contribute/contribution-guide/), and grab a [starter task](https://issues.apache.org/jira/browse/BEAM-520?jql=project%20%3D%20BEAM%20AND%20resolution%20%3D%20Unresolved%20AND%20labels%20in%20(newbie%2C%20starter)) from Jira!
-

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_posts/2016-10-12-strata-hadoop-world-and-beam.md
----------------------------------------------------------------------
diff --git a/_posts/2016-10-12-strata-hadoop-world-and-beam.md b/_posts/2016-10-12-strata-hadoop-world-and-beam.md
deleted file mode 100644
index f190762..0000000
--- a/_posts/2016-10-12-strata-hadoop-world-and-beam.md
+++ /dev/null
@@ -1,32 +0,0 @@
----
-layout: post
-title:  "Strata+Hadoop World and Beam"
-date:   2016-10-11 09:00:00 -0800
-excerpt_separator: <!--more-->
-categories: beam update
-authors:
-- jesseanderson
----
-
-Tyler Akidau and I gave a [three-hour tutorial](http://conferences.oreilly.com/strata/hadoop-big-data-ny/public/schedule/detail/52129) on Apache Beam at Strata+Hadoop World 2016. We had a plethora of help from our TAs: Kenn Knowles, Reuven Lax, Felipe Hoffa, Slava Chernyak, and Jamie Grier. There were a total of 66 people that attended the session.<!--more-->
-
-<img src="{{ "/images/blog/IMG_20160927_170956.jpg" | prepend: site.baseurl }}" alt="Exercise time">
-
-If you want to take a look at the tutorial materials, we\u2019ve put them up [on GitHub](https://github.com/eljefe6a/beamexample). This includes the [actual slides](https://github.com/eljefe6a/beamexample/blob/master/BeamTutorial/slides.pdf) as well as the [exercises](https://github.com/eljefe6a/beamexample/tree/master/BeamTutorial/src/main/java/org/apache/beam/examples/tutorial/game) that we covered. If you\u2019re looking to learn a little about Beam, this is a good way to start. The exercises are based on an imaginary mobile game where data needs processing and are based on code in the [Beam examples directory](https://github.com/apache/incubator-beam/tree/master/examples/java8/src/main/java/org/apache/beam/examples/complete/game). The code has TODOs for where you need to fill in code or there are full sample solutions to look over our code. You can run these examples on your own machine or on a cluster using a runner that Beam supports.
-
-I want to share some of takeaways I had about Beam during the conference.
-
-The Data Engineers are looking to Beam as a way to [future-proof](https://www.oreilly.com/ideas/future-proof-and-scale-proof-your-code), meaning that code is portable between the various Big Data frameworks. In fact, many of the attendees were still on Hadoop MapReduce and looking to transition to a new framework. They\u2019re realizing that continually rewriting code isn\u2019t the most productive approach.
-
-Data Scientists are really interested in using Beam. They interested in having a single API for doing analysis instead of several different APIs. We talked about Beam\u2019s progress on the Python API. If you want to take a peek, it\u2019s being actively developed on a [feature branch](https://github.com/apache/incubator-beam/tree/python-sdk). As Beam matures, we\u2019re looking to add other supported languages.
-
-We heard [loud and clear](https://twitter.com/jessetanderson/status/781124173108305920) from Beam users that great runner support is crucial to adoption. We have great Apache Flink support. During the conference we had some more volunteers offer their help on the Spark runner.
-
-On management and thought leader side, Beam went from \u201cwhat\u2019s Beam?\u201d at previous conferences to \u201cI\u2019m interested in Beam.\u201d or \u201cI\u2019ve formed an informed opinion on Beam.\u201d at this conference. This is one of the metrics I look for in early technology adoption.
-
-<img src="{{ "/images/blog/IMG_20160927_170455.jpg" | prepend: site.baseurl }}" alt="So much brainpower answering questions">
-
-We rounded out the tutorial with live demonstrations of Beam running on Apache Spark, Apache Flink, the local runner, and DataFlow runner. Then, we brought in the big brainpower and had a Q and A session.
-
-If you\u2019re attending a conference, we encourage you to look for a Beam session. If you want to use these materials to give your own Beam talk or tutorial, we\u2019re happy to help you. In addition to this tutorial, we have [other presentation materials]({{ site.baseurl }}/learn/presentation-materials/). You can reach out to us on the [user mailing list]({{ site.baseurl }}/use/mailing-lists/).
-

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_posts/2016-10-20-test-stream.md
----------------------------------------------------------------------
diff --git a/_posts/2016-10-20-test-stream.md b/_posts/2016-10-20-test-stream.md
deleted file mode 100644
index e2e52de..0000000
--- a/_posts/2016-10-20-test-stream.md
+++ /dev/null
@@ -1,309 +0,0 @@
----
-layout: post
-title:  "Testing Unbounded Pipelines in Apache Beam"
-date:   2016-10-20 10:00:00 -0800
-excerpt_separator: <!--more-->
-categories: blog
-authors:
-- tgroh
----
-
-The Beam Programming Model unifies writing pipelines for Batch and Streaming
-pipelines. We\u2019ve recently introduced a new PTransform to write tests for
-pipelines that will be run over unbounded datasets and must handle out-of-order
-and delayed data.
-<!--more-->
-
-Watermarks, Windows and Triggers form a core part of the Beam programming model
--- they respectively determine how your data are grouped, when your input is
-complete, and when to produce results. This is true for all pipelines,
-regardless of if they are processing bounded or unbounded inputs. If you\u2019re not
-familiar with watermarks, windowing, and triggering in the Beam model,
-[Streaming 101](https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101)
-and [Streaming 102](https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102)
-are an excellent place to get started. A key takeaway from
-these articles: in realistic streaming scenarios with intermittent failures and
-disconnected users, data can arrive out of order or be delayed. Beam\u2019s
-primitives provide a way for users to perform useful, powerful, and correct
-computations in spite of these challenges.
-
-As Beam pipeline authors, we need comprehensive tests that cover crucial
-failure scenarios and corner cases to gain real confidence that a pipeline is
-ready for production. The existing testing infrastructure within the Beam SDKs
-permits tests to be written which examine the contents of a Pipeline at
-execution time. However, writing unit tests for pipelines that may receive
-late data or trigger multiple times has historically ranged from complex to
-not possible, as pipelines that read from unbounded sources do not shut down
-without external intervention, while pipelines that read from bounded sources
-exclusively cannot test behavior with late data nor most speculative triggers.
-Without additional tools, pipelines that use custom triggers and handle
-out-of-order data could not be easily tested.
-
-This blog post introduces our new framework for writing tests for pipelines that
-handle delayed and out-of-order data in the context of the LeaderBoard pipeline
-from the Mobile Gaming example series.
-
-## LeaderBoard and the Mobile Gaming Example
-
-[LeaderBoard](https://github.com/apache/incubator-beam/blob/master/examples/java8/src/main/java/org/apache/beam/examples/complete/game/LeaderBoard.java#L177)
-is part of the [Beam mobile gaming examples](https://github.com/apache/incubator-beam/tree/master/examples/java8/src/main/java/org/apache/beam/examples/complete/game)
-(and [walkthroughs]({{ site.baseurl }}/get-started/mobile-gaming-example/))
-which produces a continuous accounting of user and team scores. User scores are
-calculated over the lifetime of the program, while team scores are calculated
-within fixed windows with a default duration of one hour. The LeaderBoard
-pipeline produces speculative and late panes as appropriate, based on the
-configured triggering and allowed lateness of the pipeline. The expected outputs
-of the LeaderBoard pipeline vary depending on when elements arrive in relation
-to the watermark and the progress of processing time, which could not previously
-be controlled within a test.
-
-## Writing Deterministic Tests to Emulate Nondeterminism
-
-The Beam testing infrastructure provides the
-[PAssert]({{ site.baseurl }}/documentation/sdks/javadoc/0.2.0-incubating/)
-methods, which assert properties about the contents of a PCollection from within
-a pipeline. We have expanded this infrastructure to include
-[TestStream](https://github.com/apache/incubator-beam/blob/master/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestStream.java),
-which is a PTransform that performs a series of events, consisting of adding
-additional elements to a pipeline, advancing the watermark of the TestStream,
-and advancing the pipeline processing time clock. TestStream permits tests which
-observe the effects of triggers on the output a pipeline produces.
-
-While executing a pipeline that reads from a TestStream, the read waits for all
-of the consequences of each event to complete before continuing on to the next
-event, ensuring that when processing time advances, triggers that are based on
-processing time fire as appropriate. With this transform, the effect of
-triggering and allowed lateness can be observed on a pipeline, including
-reactions to speculative and late panes and dropped data.
-
-## Element Timings
-
-Elements arrive either behind, with, or after the watermark, which categorizes
-them into the "early", "on-time", and "late" divisions. "Late" elements can be
-further subdivided into "unobservably", "observably", and "droppably" late,
-depending on the window to which they are assigned and the maximum allowed
-lateness, as specified by the windowing strategy. Elements that arrive with
-these timings are emitted into panes, which can be "EARLY", "ON-TIME", or
-"LATE", depending on the position of the watermark when the pane was emitted.
-
-Using TestStream, we can write tests that demonstrate that speculative panes are
-output after their trigger condition is met, that the advancing of the watermark
-causes the on-time pane to be produced, and that late-arriving data produces
-refinements when it arrives before the maximum allowed lateness, and is dropped
-after.
-
-The following examples demonstrate how you can use TestStream to provide a
-sequence of events to the Pipeline, where the arrival of elements is interspersed
-with updates to the watermark and the advance of processing time. Each of these
-events runs to completion before additional events occur.
-
-In the diagrams, the time at which events occurred in "real" (event) time
-progresses as the graph moves to the right. The time at which the pipeline
-receives them progresses as the graph goes upwards. The watermark is represented
-by the squiggly red line, and each starburst is the firing of a trigger and the
-associated pane.
-
-<img class="center-block" src="{{ "/images/blog/test-stream/elements-all-on-time.png" | prepend: site.baseurl }}" alt="Elements on the Event and Processing time axes, with the Watermark and produced panes" width="442">
-
-### Everything arrives on-time
-
-For example, if we create a TestStream where all the data arrives before the
-watermark and provide the result PCollection as input to the CalculateTeamScores
-PTransform:
-
-```java
-TestStream<GameActionInfo> infos = TestStream.create(AvroCoder.of(GameActionInfo.class))
-    .addElements(new GameActionInfo("sky", "blue", 12, new Instant(0L)),
- ����������������new GameActionInfo("navy", "blue", 3, new Instant(0L)),
- ����������������new GameActionInfo("navy", "blue", 3, new Instant(0L).plus(Duration.standardMinutes(3))))
- ���// Move the watermark past the end the end of the window
-    .advanceWatermarkTo(new Instant(0L).plus(TEAM_WINDOW_DURATION)
- �������������������������������       .plus(Duration.standardMinutes(1)))
-    .advanceWatermarkToInfinity();
-
-PCollection<KV<String, Integer>> teamScores = p.apply(createEvents)
-    .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
-```
-
-we can then assert that the result PCollection contains elements that arrived:
-
-<img class="center-block" src="{{ "/images/blog/test-stream/elements-all-on-time.png" | prepend: site.baseurl }}" alt="Elements all arrive before the watermark, and are produced in the on-time pane" width="442">
-
-```java
-// Only one value is emitted for the blue team
-PAssert.that(teamScores)
-       .inWindow(window)
-       .containsInAnyOrder(KV.of("blue", 18));
-p.run();
-```
-
-### Some elements are late, but arrive before the end of the window
-
-We can also add data to the TestStream after the watermark, but before the end
-of the window (shown below to the left of the red watermark), which demonstrates
-"unobservably late" data - that is, data that arrives late, but is promoted by
-the system to be on time, as it arrives before the watermark passes the end of
-the window
-
-```java
-TestStream<GameActionInfo> infos = TestStream.create(AvroCoder.of(GameActionInfo.class))
-    .addElements(new GameActionInfo("sky", "blue", 3, new Instant(0L)),
- �������         new GameActionInfo("navy", "blue", 3, new Instant(0L).plus(Duration.standardMinutes(3))))
- ���// Move the watermark up to "near" the end of the window
-    .advanceWatermarkTo(new Instant(0L).plus(TEAM_WINDOW_DURATION)
- ���������������       ����������������.minus(Duration.standardMinutes(1)))
-    .addElements(new GameActionInfo("sky", "blue", 12, Duration.ZERO))
-    .advanceWatermarkToInfinity();
-
-PCollection<KV<String, Integer>> teamScores = p.apply(createEvents)
-    .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
-```
-
-<img class="center-block" src="{{ "/images/blog/test-stream/elements-unobservably-late.png" | prepend: site.baseurl }}" alt="An element arrives late, but before the watermark passes the end of the window, and is produced in the on-time pane" width="442">
-
-```java
-// Only one value is emitted for the blue team
-PAssert.that(teamScores)
-       .inWindow(window)
-       .containsInAnyOrder(KV.of("blue", 18));
-p.run();
-```
-
-### Elements are late, and arrive after the end of the window
-
-By advancing the watermark farther in time before adding the late data, we can
-demonstrate the triggering behavior that causes the system to emit an on-time
-pane, and then after the late data arrives, a pane that refines the result.
-
-```java
-TestStream<GameActionInfo> infos = TestStream.create(AvroCoder.of(GameActionInfo.class))
-    .addElements(new GameActionInfo("sky", "blue", 3, new Instant(0L)),
-       ����������new GameActionInfo("navy", "blue", 3, new Instant(0L).plus(Duration.standardMinutes(3))))
-� ��// Move the watermark up to "near" the end of the window
-    .advanceWatermarkTo(new Instant(0L).plus(TEAM_WINDOW_DURATION)
-��������������������������������       .minus(Duration.standardMinutes(1)))
-    .addElements(new GameActionInfo("sky", "blue", 12, Duration.ZERO))
-    .advanceWatermarkToInfinity();
-
-PCollection<KV<String, Integer>> teamScores = p.apply(createEvents)
-    .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
-```
-
-<img class="center-block" src="{{ "/images/blog/test-stream/elements-observably-late.png" | prepend: site.baseurl }}" alt="Elements all arrive before the watermark, and are produced in the on-time pane" width="442">
-
-```java
-// An on-time pane is emitted with the events that arrived before the window closed
-PAssert.that(teamScores)
-       .inOnTimePane(window)
-       .containsInAnyOrder(KV.of("blue", 6));
-// The final pane contains the late refinement
-PAssert.that(teamScores)
-       .inFinalPane(window)
-       .containsInAnyOrder(KV.of("blue", 18));
-p.run();
-```
-
-### Elements are late, and after the end of the window plus the allowed lateness
-
-If we push the watermark even further into the future, beyond the maximum
-configured allowed lateness, we can demonstrate that the late element is dropped
-by the system.
-
-```java
-TestStream<GameActionInfo> infos = TestStream.create(AvroCoder.of(GameActionInfo.class))
-    .addElements(new GameActionInfo("sky", "blue", 3, Duration.ZERO),
-        ���������new GameActionInfo("navy", "blue", 3, Duration.standardMinutes(3)))
-����// Move the watermark up to "near" the end of the window
-    .advanceWatermarkTo(new Instant(0).plus(TEAM_WINDOW_DURATION)
- ����������������������������������������.plus(ALLOWED_LATENESS)
- ����������������������������������������.plus(Duration.standardMinutes(1)))
-    .addElements(new GameActionInfo(
-���������������������"sky",
-���������������������"blue",
-���������������������12,
-���������������������new Instant(0).plus(TEAM_WINDOW_DURATION).minus(Duration.standardMinutes(1))))
-    .advanceWatermarkToInfinity();
-
-PCollection<KV<String, Integer>> teamScores = p.apply(createEvents)
-    .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
-```
-
-<img class="center-block" src="{{ "/images/blog/test-stream/elements-droppably-late.png" | prepend: site.baseurl }}" alt="Elements all arrive before the watermark, and are produced in the on-time pane" width="442">
-
-```java
-// An on-time pane is emitted with the events that arrived before the window closed
-PAssert.that(teamScores)
-       .inWindow(window)
-       .containsInAnyOrder(KV.of("blue", 6));
-
-p.run();
-```
-
-### Elements arrive before the end of the window, and some processing time passes
-Using additional methods, we can demonstrate the behavior of speculative
-triggers by advancing the processing time of the TestStream. If we add elements
-to an input PCollection, occasionally advancing the processing time clock, and
-apply `CalculateUserScores`
-
-```java
-TestStream.create(AvroCoder.of(GameActionInfo.class))
- ���.addElements(new GameActionInfo("scarlet", "red", 3, new Instant(0L)),
- ��������������  new GameActionInfo("scarlet", "red", 2, new Instant(0L).plus(Duration.standardMinutes(1))))
-    .advanceProcessingTime(Duration.standardMinutes(12))
- ���.addElements(new GameActionInfo("oxblood", "red", 2, new Instant(0L)).plus(Duration.standardSeconds(22)),
- ��������������  new GameActionInfo("scarlet", "red", 4, new Instant(0L).plus(Duration.standardMinutes(2))))
-    .advanceProcessingTime(Duration.standardMinutes(15))
-    .advanceWatermarkToInfinity();
-
-PCollection<KV<String, Integer>> userScores =
- ���p.apply(infos).apply(new CalculateUserScores(ALLOWED_LATENESS));
-```
-
-<img class="center-block" src="{{ "/images/blog/test-stream/elements-processing-speculative.png" | prepend: site.baseurl }}" alt="Elements all arrive before the watermark, and are produced in the on-time pane" width="442">
-
-```java
-PAssert.that(userScores)
-       .inEarlyGlobalWindowPanes()
-       .containsInAnyOrder(KV.of("scarlet", 5),
-   ������������������������KV.of("scarlet", 9),
-                           KV.of("oxblood", 2));
-
-p.run();
-```
-
-## TestStream - Under the Hood
-
-TestStream relies on a pipeline concept we\u2019ve introduced, called quiescence, to
-utilize the existing runner infrastructure while providing guarantees about when
-a root transform will called by the runner. This consists of properties about
-pending elements and triggers, namely:
-
-* No trigger is permitted to fire but has not fired
-* All elements are either buffered in state or cannot progress until a side input becomes available
-
-Simplified, this means that, in the absence of an advancement in input
-watermarks or processing time, or additional elements being added to the
-pipeline, the pipeline will not make progress. Whenever the TestStream PTransform
-performs an action, the runner must not reinvoke the same instance until the
-pipeline has quiesced. This ensures that the events specified by TestStream
-happen "in-order", which ensures that input watermarks and the system clock do
-not advance ahead of the elements they hoped to hold up.
-
-The DirectRunner has been modified to use quiescence as the signal that it
-should add more work to the Pipeline, and the implementation of TestStream in
-that runner uses this fact to perform a single output per event. The DirectRunner
-implementation also directly controls the runner\u2019s system clock, ensuring that
-tests will complete promptly even if there is a multi-minute processing time
-trigger located within the pipeline.
-
-The TestStream transform is supported in the DirectRunner. For most users, tests
-written using TestPipeline and PAsserts will automatically function while using
-TestStream.
-
-## Summary
-
-The addition of TestStream alongside window and pane-specific matchers in PAssert
-has enabled the testing of Pipelines which produce speculative and late panes.
-This permits tests for all styles of pipeline to be expressed directly within the
-Java SDK. If you have questions or comments, we\u2019d love to hear them on the
-[mailing lists]({{ site.baseurl }}/use/mailing-lists/).

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_sass/_base.scss
----------------------------------------------------------------------
diff --git a/_sass/_base.scss b/_sass/_base.scss
deleted file mode 100644
index 0883c3c..0000000
--- a/_sass/_base.scss
+++ /dev/null
@@ -1,206 +0,0 @@
-/**
- * Reset some basic elements
- */
-body, h1, h2, h3, h4, h5, h6,
-p, blockquote, pre, hr,
-dl, dd, ol, ul, figure {
-    margin: 0;
-    padding: 0;
-}
-
-
-
-/**
- * Basic styling
- */
-body {
-    font: $base-font-weight #{$base-font-size}/#{$base-line-height} $base-font-family;
-    color: $text-color;
-    background-color: $background-color;
-    -webkit-text-size-adjust: 100%;
-    -webkit-font-feature-settings: "kern" 1;
-    -moz-font-feature-settings: "kern" 1;
-    -o-font-feature-settings: "kern" 1;
-    font-feature-settings: "kern" 1;
-    font-kerning: normal;
-}
-
-
-
-/**
- * Set `margin-bottom` to maintain vertical rhythm
- */
-h1, h2, h3, h4, h5, h6,
-p, blockquote, pre,
-ul, ol, dl, figure,
-%vertical-rhythm {
-    margin-bottom: $spacing-unit / 2;
-}
-
-
-
-/**
- * Images
- */
-img {
-    max-width: 100%;
-    vertical-align: middle;
-}
-
-
-
-/**
- * Figures
- */
-figure > img {
-    display: block;
-}
-
-figcaption {
-    font-size: $small-font-size;
-}
-
-
-
-/**
- * Lists
- */
-ul, ol {
-    margin-left: $spacing-unit;
-}
-
-li {
-    > ul,
-    > ol {
-         margin-bottom: 0;
-    }
-}
-
-
-
-/**
- * Headings
- */
-h1, h2, h3, h4, h5, h6 {
-    font-weight: $base-font-weight;
-}
-
-
-
-/**
- * Links
- */
-a {
-    color: $brand-color;
-    text-decoration: none;
-
-    &:visited {
-        color: darken($brand-color, 15%);
-    }
-
-    &:hover {
-        color: $text-color;
-        text-decoration: underline;
-    }
-}
-
-
-
-/**
- * Blockquotes
- */
-blockquote {
-    color: $grey-color;
-    border-left: 4px solid $grey-color-light;
-    padding-left: $spacing-unit / 2;
-    font-size: 18px;
-    letter-spacing: -1px;
-    font-style: italic;
-
-    > :last-child {
-        margin-bottom: 0;
-    }
-}
-
-
-
-/**
- * Code formatting
- */
-pre,
-code {
-    font-size: 15px;
-    border: 1px solid $grey-color-light;
-    border-radius: 3px;
-    background-color: #eef;
-}
-
-code {
-    padding: 1px 5px;
-}
-
-pre {
-    padding: 8px 12px;
-    overflow-x: auto;
-
-    > code {
-        border: 0;
-        padding-right: 0;
-        padding-left: 0;
-    }
-}
-
-
-
-/**
- * Wrapper
- */
-.wrapper {
-    max-width: -webkit-calc(#{$content-width} - (#{$spacing-unit} * 2));
-    max-width:         calc(#{$content-width} - (#{$spacing-unit} * 2));
-    margin-right: auto;
-    margin-left: auto;
-    padding-right: $spacing-unit;
-    padding-left: $spacing-unit;
-    @extend %clearfix;
-
-    @include media-query($on-laptop) {
-        max-width: -webkit-calc(#{$content-width} - (#{$spacing-unit}));
-        max-width:         calc(#{$content-width} - (#{$spacing-unit}));
-        padding-right: $spacing-unit / 2;
-        padding-left: $spacing-unit / 2;
-    }
-}
-
-
-
-/**
- * Clearfix
- */
-%clearfix {
-
-    &:after {
-        content: "";
-        display: table;
-        clear: both;
-    }
-}
-
-
-
-/**
- * Icons
- */
-.icon {
-
-    > svg {
-        display: inline-block;
-        width: 16px;
-        height: 16px;
-        vertical-align: middle;
-
-        path {
-            fill: $grey-color;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_sass/_bootstrap.scss
----------------------------------------------------------------------
diff --git a/_sass/_bootstrap.scss b/_sass/_bootstrap.scss
deleted file mode 100755
index c773c8c..0000000
--- a/_sass/_bootstrap.scss
+++ /dev/null
@@ -1,56 +0,0 @@
-/*!
- * Bootstrap v3.3.6 (http://getbootstrap.com)
- * Copyright 2011-2015 Twitter, Inc.
- * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
- */
-
-// Core variables and mixins
-@import "bootstrap/variables";
-@import "bootstrap/mixins";
-
-// Reset and dependencies
-@import "bootstrap/normalize";
-@import "bootstrap/print";
-@import "bootstrap/glyphicons";
-
-// Core CSS
-@import "bootstrap/scaffolding";
-@import "bootstrap/type";
-@import "bootstrap/code";
-@import "bootstrap/grid";
-@import "bootstrap/tables";
-@import "bootstrap/forms";
-@import "bootstrap/buttons";
-
-// Components
-@import "bootstrap/component-animations";
-@import "bootstrap/dropdowns";
-@import "bootstrap/button-groups";
-@import "bootstrap/input-groups";
-@import "bootstrap/navs";
-@import "bootstrap/navbar";
-@import "bootstrap/breadcrumbs";
-@import "bootstrap/pagination";
-@import "bootstrap/pager";
-@import "bootstrap/labels";
-@import "bootstrap/badges";
-@import "bootstrap/jumbotron";
-@import "bootstrap/thumbnails";
-@import "bootstrap/alerts";
-@import "bootstrap/progress-bars";
-@import "bootstrap/media";
-@import "bootstrap/list-group";
-@import "bootstrap/panels";
-@import "bootstrap/responsive-embed";
-@import "bootstrap/wells";
-@import "bootstrap/close";
-
-// Components w/ JavaScript
-@import "bootstrap/modals";
-@import "bootstrap/tooltip";
-@import "bootstrap/popovers";
-@import "bootstrap/carousel";
-
-// Utility classes
-@import "bootstrap/utilities";
-@import "bootstrap/responsive-utilities";

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_sass/_layout.scss
----------------------------------------------------------------------
diff --git a/_sass/_layout.scss b/_sass/_layout.scss
deleted file mode 100644
index 9cbfdde..0000000
--- a/_sass/_layout.scss
+++ /dev/null
@@ -1,242 +0,0 @@
-/**
- * Site header
- */
-.site-header {
-    border-top: 5px solid $grey-color-dark;
-    border-bottom: 1px solid $grey-color-light;
-    min-height: 56px;
-
-    // Positioning context for the mobile navigation icon
-    position: relative;
-}
-
-.site-title {
-    font-size: 26px;
-    font-weight: 300;
-    line-height: 56px;
-    letter-spacing: -1px;
-    margin-bottom: 0;
-    float: left;
-
-    &,
-    &:visited {
-        color: $grey-color-dark;
-    }
-}
-
-.site-nav {
-    float: right;
-    line-height: 56px;
-
-    .menu-icon {
-        display: none;
-    }
-
-    .page-link {
-        color: $text-color;
-        line-height: $base-line-height;
-
-        // Gaps between nav items, but not on the last one
-        &:not(:last-child) {
-            margin-right: 20px;
-        }
-    }
-
-    @include media-query($on-palm) {
-        position: absolute;
-        top: 9px;
-        right: $spacing-unit / 2;
-        background-color: $background-color;
-        border: 1px solid $grey-color-light;
-        border-radius: 5px;
-        text-align: right;
-
-        .menu-icon {
-            display: block;
-            float: right;
-            width: 36px;
-            height: 26px;
-            line-height: 0;
-            padding-top: 10px;
-            text-align: center;
-
-            > svg {
-                width: 18px;
-                height: 15px;
-
-                path {
-                    fill: $grey-color-dark;
-                }
-            }
-        }
-
-        .trigger {
-            clear: both;
-            display: none;
-        }
-
-        &:hover .trigger {
-            display: block;
-            padding-bottom: 5px;
-        }
-
-        .page-link {
-            display: block;
-            padding: 5px 10px;
-
-            &:not(:last-child) {
-                margin-right: 0;
-            }
-            margin-left: 20px;
-        }
-    }
-}
-
-
-
-/**
- * Site footer
- */
-.site-footer {
-    border-top: 1px solid $grey-color-light;
-    padding: $spacing-unit 0;
-}
-
-.footer-heading {
-    font-size: 18px;
-    margin-bottom: $spacing-unit / 2;
-}
-
-.contact-list,
-.social-media-list {
-    list-style: none;
-    margin-left: 0;
-}
-
-.footer-col-wrapper {
-    font-size: 15px;
-    color: $grey-color;
-    margin-left: -$spacing-unit / 2;
-    @extend %clearfix;
-}
-
-.footer-col {
-    float: left;
-    margin-bottom: $spacing-unit / 2;
-    padding-left: $spacing-unit / 2;
-}
-
-.footer-col-1 {
-    width: -webkit-calc(35% - (#{$spacing-unit} / 2));
-    width:         calc(35% - (#{$spacing-unit} / 2));
-}
-
-.footer-col-2 {
-    width: -webkit-calc(20% - (#{$spacing-unit} / 2));
-    width:         calc(20% - (#{$spacing-unit} / 2));
-}
-
-.footer-col-3 {
-    width: -webkit-calc(45% - (#{$spacing-unit} / 2));
-    width:         calc(45% - (#{$spacing-unit} / 2));
-}
-
-@include media-query($on-laptop) {
-    .footer-col-1,
-    .footer-col-2 {
-        width: -webkit-calc(50% - (#{$spacing-unit} / 2));
-        width:         calc(50% - (#{$spacing-unit} / 2));
-    }
-
-    .footer-col-3 {
-        width: -webkit-calc(100% - (#{$spacing-unit} / 2));
-        width:         calc(100% - (#{$spacing-unit} / 2));
-    }
-}
-
-@include media-query($on-palm) {
-    .footer-col {
-        float: none;
-        width: -webkit-calc(100% - (#{$spacing-unit} / 2));
-        width:         calc(100% - (#{$spacing-unit} / 2));
-    }
-}
-
-
-
-/**
- * Page content
- */
-.page-content {
-    padding: $spacing-unit 0;
-}
-
-.page-heading {
-    font-size: 20px;
-}
-
-.post-list {
-    margin-left: 0;
-    list-style: none;
-
-    > li {
-        margin-bottom: $spacing-unit;
-    }
-}
-
-.post-meta {
-    font-size: $small-font-size;
-    color: $grey-color;
-}
-
-.post-link {
-    display: block;
-    font-size: 24px;
-}
-
-
-
-/**
- * Posts
- */
-.post-header {
-    margin-bottom: $spacing-unit;
-}
-
-.post-title {
-    font-size: 42px;
-    letter-spacing: -1px;
-    line-height: 1;
-
-    @include media-query($on-laptop) {
-        font-size: 36px;
-    }
-}
-
-.post-content {
-    margin-bottom: $spacing-unit;
-
-    h2 {
-        font-size: 32px;
-
-        @include media-query($on-laptop) {
-            font-size: 28px;
-        }
-    }
-
-    h3 {
-        font-size: 26px;
-
-        @include media-query($on-laptop) {
-            font-size: 22px;
-        }
-    }
-
-    h4 {
-        font-size: 20px;
-
-        @include media-query($on-laptop) {
-            font-size: 18px;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_sass/_syntax-highlighting.scss
----------------------------------------------------------------------
diff --git a/_sass/_syntax-highlighting.scss b/_sass/_syntax-highlighting.scss
deleted file mode 100644
index b5cd813..0000000
--- a/_sass/_syntax-highlighting.scss
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Syntax highlighting styles
- */
-.highlight {
-    background: #fff;
-
-    .highlighter-rouge & {
-      background: #eef;
-    }
-
-    .c     { color: #998; font-style: italic } // Comment
-    .err   { color: #a61717 } // Error
-    .k     { font-weight: bold } // Keyword
-    .o     { font-weight: bold } // Operator
-    .cm    { color: #998; font-style: italic } // Comment.Multiline
-    .cp    { color: #999; font-weight: bold } // Comment.Preproc
-    .c1    { color: #998; font-style: italic } // Comment.Single
-    .cs    { color: #999; font-weight: bold; font-style: italic } // Comment.Special
-    .gd    { color: #000; background-color: #fdd } // Generic.Deleted
-    .gd .x { color: #000; background-color: #faa } // Generic.Deleted.Specific
-    .ge    { font-style: italic } // Generic.Emph
-    .gr    { color: #a00 } // Generic.Error
-    .gh    { color: #999 } // Generic.Heading
-    .gi    { color: #000; background-color: #dfd } // Generic.Inserted
-    .gi .x { color: #000; background-color: #afa } // Generic.Inserted.Specific
-    .go    { color: #888 } // Generic.Output
-    .gp    { color: #555 } // Generic.Prompt
-    .gs    { font-weight: bold } // Generic.Strong
-    .gu    { color: #aaa } // Generic.Subheading
-    .gt    { color: #a00 } // Generic.Traceback
-    .kc    { font-weight: bold } // Keyword.Constant
-    .kd    { font-weight: bold } // Keyword.Declaration
-    .kp    { font-weight: bold } // Keyword.Pseudo
-    .kr    { font-weight: bold } // Keyword.Reserved
-    .kt    { color: #458; font-weight: bold } // Keyword.Type
-    .m     { color: #099 } // Literal.Number
-    .s     { color: #d14 } // Literal.String
-    .na    { color: #008080 } // Name.Attribute
-    .nb    { color: #0086B3 } // Name.Builtin
-    .nc    { color: #458; font-weight: bold } // Name.Class
-    .no    { color: #008080 } // Name.Constant
-    .ni    { color: #800080 } // Name.Entity
-    .ne    { color: #900; font-weight: bold } // Name.Exception
-    .nf    { color: #900; font-weight: bold } // Name.Function
-    .nn    { color: #555 } // Name.Namespace
-    .nt    { color: #000080 } // Name.Tag
-    .nv    { color: #008080 } // Name.Variable
-    .ow    { font-weight: bold } // Operator.Word
-    .w     { color: #bbb } // Text.Whitespace
-    .mf    { color: #099 } // Literal.Number.Float
-    .mh    { color: #099 } // Literal.Number.Hex
-    .mi    { color: #099 } // Literal.Number.Integer
-    .mo    { color: #099 } // Literal.Number.Oct
-    .sb    { color: #d14 } // Literal.String.Backtick
-    .sc    { color: #d14 } // Literal.String.Char
-    .sd    { color: #d14 } // Literal.String.Doc
-    .s2    { color: #d14 } // Literal.String.Double
-    .se    { color: #d14 } // Literal.String.Escape
-    .sh    { color: #d14 } // Literal.String.Heredoc
-    .si    { color: #d14 } // Literal.String.Interpol
-    .sx    { color: #d14 } // Literal.String.Other
-    .sr    { color: #009926 } // Literal.String.Regex
-    .s1    { color: #d14 } // Literal.String.Single
-    .ss    { color: #990073 } // Literal.String.Symbol
-    .bp    { color: #999 } // Name.Builtin.Pseudo
-    .vc    { color: #008080 } // Name.Variable.Class
-    .vg    { color: #008080 } // Name.Variable.Global
-    .vi    { color: #008080 } // Name.Variable.Instance
-    .il    { color: #099 } // Literal.Number.Integer.Long
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_sass/bootstrap/_alerts.scss
----------------------------------------------------------------------
diff --git a/_sass/bootstrap/_alerts.scss b/_sass/bootstrap/_alerts.scss
deleted file mode 100755
index 7d1e1fd..0000000
--- a/_sass/bootstrap/_alerts.scss
+++ /dev/null
@@ -1,73 +0,0 @@
-//
-// Alerts
-// --------------------------------------------------
-
-
-// Base styles
-// -------------------------
-
-.alert {
-  padding: $alert-padding;
-  margin-bottom: $line-height-computed;
-  border: 1px solid transparent;
-  border-radius: $alert-border-radius;
-
-  // Headings for larger alerts
-  h4 {
-    margin-top: 0;
-    // Specified for the h4 to prevent conflicts of changing $headings-color
-    color: inherit;
-  }
-
-  // Provide class for links that match alerts
-  .alert-link {
-    font-weight: $alert-link-font-weight;
-  }
-
-  // Improve alignment and spacing of inner content
-  > p,
-  > ul {
-    margin-bottom: 0;
-  }
-
-  > p + p {
-    margin-top: 5px;
-  }
-}
-
-// Dismissible alerts
-//
-// Expand the right padding and account for the close button's positioning.
-
-.alert-dismissable, // The misspelled .alert-dismissable was deprecated in 3.2.0.
-.alert-dismissible {
-  padding-right: ($alert-padding + 20);
-
-  // Adjust close link position
-  .close {
-    position: relative;
-    top: -2px;
-    right: -21px;
-    color: inherit;
-  }
-}
-
-// Alternate styles
-//
-// Generate contextual modifier classes for colorizing the alert.
-
-.alert-success {
-  @include alert-variant($alert-success-bg, $alert-success-border, $alert-success-text);
-}
-
-.alert-info {
-  @include alert-variant($alert-info-bg, $alert-info-border, $alert-info-text);
-}
-
-.alert-warning {
-  @include alert-variant($alert-warning-bg, $alert-warning-border, $alert-warning-text);
-}
-
-.alert-danger {
-  @include alert-variant($alert-danger-bg, $alert-danger-border, $alert-danger-text);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam-site/blob/cc22c462/_sass/bootstrap/_badges.scss
----------------------------------------------------------------------
diff --git a/_sass/bootstrap/_badges.scss b/_sass/bootstrap/_badges.scss
deleted file mode 100755
index 70002e0..0000000
--- a/_sass/bootstrap/_badges.scss
+++ /dev/null
@@ -1,68 +0,0 @@
-//
-// Badges
-// --------------------------------------------------
-
-
-// Base class
-.badge {
-  display: inline-block;
-  min-width: 10px;
-  padding: 3px 7px;
-  font-size: $font-size-small;
-  font-weight: $badge-font-weight;
-  color: $badge-color;
-  line-height: $badge-line-height;
-  vertical-align: middle;
-  white-space: nowrap;
-  text-align: center;
-  background-color: $badge-bg;
-  border-radius: $badge-border-radius;
-
-  // Empty badges collapse automatically (not available in IE8)
-  &:empty {
-    display: none;
-  }
-
-  // Quick fix for badges in buttons
-  .btn & {
-    position: relative;
-    top: -1px;
-  }
-
-  .btn-xs &,
-  .btn-group-xs > .btn & {
-    top: 0;
-    padding: 1px 5px;
-  }
-
-  // [converter] extracted a& to a.badge
-
-  // Account for badges in navs
-  .list-group-item.active > &,
-  .nav-pills > .active > a > & {
-    color: $badge-active-color;
-    background-color: $badge-active-bg;
-  }
-
-  .list-group-item > & {
-    float: right;
-  }
-
-  .list-group-item > & + & {
-    margin-right: 5px;
-  }
-
-  .nav-pills > li > a > & {
-    margin-left: 3px;
-  }
-}
-
-// Hover state, but only for links
-a.badge {
-  &:hover,
-  &:focus {
-    color: $badge-link-hover-color;
-    text-decoration: none;
-    cursor: pointer;
-  }
-}