You are viewing a plain text version of this content. The canonical link for it is here.
Posted to by on 2018/05/18 01:08:25 UTC

[2/5] carbondata-site git commit: add documentation for and
diff --git a/src/main/webapp/sdk-writer-guide.html b/src/main/webapp/sdk-writer-guide.html
new file mode 100644
index 0000000..a73c22c
--- /dev/null
+++ b/src/main/webapp/sdk-writer-guide.html
@@ -0,0 +1,546 @@
+<!DOCTYPE html>
+<html lang="en">
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link href='images/favicon.ico' rel='shortcut icon' type='image/x-icon'>
+    <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
+    <title>CarbonData</title>
+    <style>
+    </style>
+    <!-- Bootstrap -->
+    <link rel="stylesheet" href="css/bootstrap.min.css">
+    <link href="css/style.css" rel="stylesheet">
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
+    <!--[if lt IE 9]>
+    <script src=""></script>
+    <script src="https://oss.maxcdn.scom/respond/1.4.2/respond.min.js"></script>
+    <![endif]-->
+    <script src="js/jquery.min.js"></script>
+    <script src="js/bootstrap.min.js"></script>
+    <nav class="navbar navbar-default navbar-custom cd-navbar-wrapper">
+        <div class="container">
+            <div class="navbar-header">
+                <button aria-controls="navbar" aria-expanded="false" data-target="#navbar" data-toggle="collapse"
+                        class="navbar-toggle collapsed" type="button">
+                    <span class="sr-only">Toggle navigation</span>
+                    <span class="icon-bar"></span>
+                    <span class="icon-bar"></span>
+                    <span class="icon-bar"></span>
+                </button>
+                <a href="index.html" class="logo">
+                    <img src="images/CarbonDataLogo.png" alt="CarbonData logo" title="CarbocnData logo"/>
+                </a>
+            </div>
+            <div class="navbar-collapse collapse cd_navcontnt" id="navbar">
+                <ul class="nav navbar-nav navbar-right navlist-custom">
+                    <li><a href="index.html" class="hidden-xs"><i class="fa fa-home" aria-hidden="true"></i> </a>
+                    </li>
+                    <li><a href="index.html" class="hidden-lg hidden-md hidden-sm">Home</a></li>
+                    <li class="dropdown">
+                        <a href="#" class="dropdown-toggle " data-toggle="dropdown" role="button" aria-haspopup="true"
+                           aria-expanded="false"> Download <span class="caret"></span></a>
+                        <ul class="dropdown-menu">
+                            <li>
+                                <a href=""
+                                   target="_blank">Apache CarbonData 1.3.1</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Apache CarbonData 1.3.0</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Apache CarbonData 1.2.0</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Apache CarbonData 1.1.1</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Apache CarbonData 1.1.0</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Apache CarbonData 1.0.0</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Apache CarbonData 0.2.0</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Apache CarbonData 0.1.1</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Apache CarbonData 0.1.0</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Release Archive</a></li>
+                        </ul>
+                    </li>
+                    <li><a href="mainpage.html" class="active">Documentation</a></li>
+                    <li class="dropdown">
+                        <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
+                           aria-expanded="false">Community <span class="caret"></span></a>
+                        <ul class="dropdown-menu">
+                            <li>
+                                <a href=""
+                                   target="_blank">Contributing to CarbonData</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Release Guide</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">Project PMC and Committers</a></li>
+                            <li>
+                                <a href=""
+                                   target="_blank">CarbonData Meetups</a></li>
+                            <li><a href="security.html">Apache CarbonData Security</a></li>
+                            <li><a href="" target="_blank">Apache
+                                Jira</a></li>
+                            <li><a href="videogallery.html">CarbonData Videos </a></li>
+                        </ul>
+                    </li>
+                    <li class="dropdown">
+                        <a href="" class="apache_link hidden-xs dropdown-toggle"
+                           data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Apache</a>
+                        <ul class="dropdown-menu">
+                            <li><a href="" target="_blank">Apache Homepage</a></li>
+                            <li><a href="" target="_blank">License</a></li>
+                            <li><a href=""
+                                   target="_blank">Sponsorship</a></li>
+                            <li><a href="" target="_blank">Thanks</a></li>
+                        </ul>
+                    </li>
+                    <li class="dropdown">
+                        <a href="" class="hidden-lg hidden-md hidden-sm dropdown-toggle"
+                           data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Apache</a>
+                        <ul class="dropdown-menu">
+                            <li><a href="" target="_blank">Apache Homepage</a></li>
+                            <li><a href="" target="_blank">License</a></li>
+                            <li><a href=""
+                                   target="_blank">Sponsorship</a></li>
+                            <li><a href="" target="_blank">Thanks</a></li>
+                        </ul>
+                    </li>
+                    <li>
+                        <a href="#" id="search-icon"><i class="fa fa-search" aria-hidden="true"></i></a>
+                    </li>
+                </ul>
+            </div><!--/.nav-collapse -->
+            <div id="search-box">
+                <form method="get" action="" target="_blank">
+                    <div class="search-block">
+                        <table border="0" cellpadding="0" width="100%">
+                            <tr>
+                                <td style="width:80%">
+                                    <input type="text" name="q" size=" 5" maxlength="255" value=""
+                                           class="search-input"  placeholder="Search...."    required/>
+                                </td>
+                                <td style="width:20%">
+                                    <input type="submit" value="Search"/></td>
+                            </tr>
+                            <tr>
+                                <td align="left" style="font-size:75%" colspan="2">
+                                    <input type="checkbox" name="sitesearch" value="" checked/>
+                                    <span style=" position: relative; top: -3px;"> Only search for CarbonData</span>
+                                </td>
+                            </tr>
+                        </table>
+                    </div>
+                </form>
+            </div>
+        </div>
+    </nav>
+</header> <!-- end Header part -->
+<div class="fixed-padding"></div> <!--  top padding with fixde header  -->
+<section><!-- Dashboard nav -->
+    <div class="container-fluid q">
+        <div class="col-sm-12  col-md-12 maindashboard">
+            <div class="row">
+                <section>
+                    <div style="padding:10px 15px;">
+                        <div id="viewpage" name="viewpage">
+                            <div class="row">
+                                <div class="col-sm-12  col-md-12">
+                                    <div><h1>
+<a id="sdk-writer-guide" class="anchor" href="#sdk-writer-guide" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>SDK Writer Guide</h1>
+<p>In the carbon jars package, there exist a carbondata-store-sdk-x.x.x-SNAPSHOT.jar.
+This SDK writer, writes carbondata file and carbonindex file at a given path.
+External client can make use of this writer to convert other format data or live data to create carbondata and index files.
+These SDK writer output contains just a carbondata and carbonindex files. No metadata folder will be present.</p>
+<a id="quick-example" class="anchor" href="#quick-example" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Quick example</h2>
+<a id="example-with-csv-format" class="anchor" href="#example-with-csv-format" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Example with csv format</h3>
+<div class="highlight highlight-source-java"><pre> <span class="pl-k">import</span> <span class="pl-smi"></span>;
+ <span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException</span>;
+ <span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.metadata.datatype.DataTypes</span>;
+ <span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.CarbonWriter</span>;
+ <span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.CarbonWriterBuilder</span>;
+ <span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.Field</span>;
+ <span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.Schema</span>;
+ <span class="pl-k">public</span> <span class="pl-k">class</span> <span class="pl-en">TestSdk</span> {
+   <span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">main</span>(<span class="pl-k">String</span>[] <span class="pl-v">args</span>) <span class="pl-k">throws</span> <span class="pl-smi">IOException</span>, <span class="pl-smi">InvalidLoadOptionException</span> {
+     testSdkWriter();
+   }
+   <span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">testSdkWriter</span>() <span class="pl-k">throws</span> <span class="pl-smi">IOException</span>, <span class="pl-smi">InvalidLoadOptionException</span> {
+     <span class="pl-smi">String</span> path <span class="pl-k">=</span> <span class="pl-s"><span class="pl-pds">"</span>/home/root1/Documents/ab/temp<span class="pl-pds">"</span></span>;
+     <span class="pl-k">Field</span>[] fields <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Field</span>[<span class="pl-c1">2</span>];
+     fields[<span class="pl-c1">0</span>] <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Field</span>(<span class="pl-s"><span class="pl-pds">"</span>name<span class="pl-pds">"</span></span>, <span class="pl-smi">DataTypes</span><span class="pl-c1"><span class="pl-k">.</span>STRING</span>);
+     fields[<span class="pl-c1">1</span>] <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Field</span>(<span class="pl-s"><span class="pl-pds">"</span>age<span class="pl-pds">"</span></span>, <span class="pl-smi">DataTypes</span><span class="pl-c1"><span class="pl-k">.</span>INT</span>);
+     <span class="pl-smi">Schema</span> schema <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Schema</span>(fields);
+     <span class="pl-smi">CarbonWriterBuilder</span> builder <span class="pl-k">=</span> <span class="pl-smi">CarbonWriter</span><span class="pl-k">.</span>builder()<span class="pl-k">.</span>withSchema(schema)<span class="pl-k">.</span>outputPath(path);
+     <span class="pl-smi">CarbonWriter</span> writer <span class="pl-k">=</span> builder<span class="pl-k">.</span>buildWriterForCSVInput();
+     <span class="pl-k">int</span> rows <span class="pl-k">=</span> <span class="pl-c1">5</span>;
+     <span class="pl-k">for</span> (<span class="pl-k">int</span> i <span class="pl-k">=</span> <span class="pl-c1">0</span>; i <span class="pl-k">&lt;</span> rows; i<span class="pl-k">++</span>) {
+       writer<span class="pl-k">.</span>write(<span class="pl-k">new</span> <span class="pl-smi">String</span>[] { <span class="pl-s"><span class="pl-pds">"</span>robot<span class="pl-pds">"</span></span> <span class="pl-k">+</span> (i <span class="pl-k">%</span> <span class="pl-c1">10</span>), <span class="pl-smi">String</span><span class="pl-k">.</span>valueOf(i) });
+     }
+     writer<span class="pl-k">.</span>close();
+   }
+ }</pre></div>
+<a id="example-with-avro-format" class="anchor" href="#example-with-avro-format" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Example with Avro format</h3>
+<div class="highlight highlight-source-java"><pre><span class="pl-k">import</span> <span class="pl-smi"></span>;
+<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException</span>;
+<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.metadata.datatype.DataTypes</span>;
+<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.AvroCarbonWriter</span>;
+<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.CarbonWriter</span>;
+<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.Field</span>;
+<span class="pl-k">import</span> <span class="pl-smi">org.apache.avro.generic.GenericData</span>;
+<span class="pl-k">import</span> <span class="pl-smi">org.apache.commons.lang.CharEncoding</span>;
+<span class="pl-k">import</span> <span class="pl-smi">tech.allegro.schema.json2avro.converter.JsonAvroConverter</span>;
+<span class="pl-k">public</span> <span class="pl-k">class</span> <span class="pl-en">TestSdkAvro</span> {
+  <span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">main</span>(<span class="pl-k">String</span>[] <span class="pl-v">args</span>) <span class="pl-k">throws</span> <span class="pl-smi">IOException</span>, <span class="pl-smi">InvalidLoadOptionException</span> {
+    testSdkWriter();
+  }
+  <span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">testSdkWriter</span>() <span class="pl-k">throws</span> <span class="pl-smi">IOException</span>, <span class="pl-smi">InvalidLoadOptionException</span> {
+    <span class="pl-smi">String</span> path <span class="pl-k">=</span> <span class="pl-s"><span class="pl-pds">"</span>./AvroCarbonWriterSuiteWriteFiles<span class="pl-pds">"</span></span>;
+    <span class="pl-c"><span class="pl-c">//</span> Avro schema</span>
+    <span class="pl-smi">String</span> avroSchema <span class="pl-k">=</span>
+        <span class="pl-s"><span class="pl-pds">"</span>{<span class="pl-pds">"</span></span> <span class="pl-k">+</span>
+            <span class="pl-s"><span class="pl-pds">"</span>   <span class="pl-cce">\"</span>type<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>record<span class="pl-cce">\"</span>,<span class="pl-pds">"</span></span> <span class="pl-k">+</span>
+            <span class="pl-s"><span class="pl-pds">"</span>   <span class="pl-cce">\"</span>name<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>Acme<span class="pl-cce">\"</span>,<span class="pl-pds">"</span></span> <span class="pl-k">+</span>
+            <span class="pl-s"><span class="pl-pds">"</span>   <span class="pl-cce">\"</span>fields<span class="pl-cce">\"</span> : [<span class="pl-pds">"</span></span>
+            <span class="pl-k">+</span> <span class="pl-s"><span class="pl-pds">"</span>{ <span class="pl-cce">\"</span>name<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>fname<span class="pl-cce">\"</span>, <span class="pl-cce">\"</span>type<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>string<span class="pl-cce">\"</span> },<span class="pl-pds">"</span></span>
+            <span class="pl-k">+</span> <span class="pl-s"><span class="pl-pds">"</span>{ <span class="pl-cce">\"</span>name<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>age<span class="pl-cce">\"</span>, <span class="pl-cce">\"</span>type<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>int<span class="pl-cce">\"</span> }]<span class="pl-pds">"</span></span> <span class="pl-k">+</span>
+            <span class="pl-s"><span class="pl-pds">"</span>}<span class="pl-pds">"</span></span>;
+    <span class="pl-smi">String</span> json <span class="pl-k">=</span> <span class="pl-s"><span class="pl-pds">"</span>{<span class="pl-cce">\"</span>fname<span class="pl-cce">\"</span>:<span class="pl-cce">\"</span>bob<span class="pl-cce">\"</span>, <span class="pl-cce">\"</span>age<span class="pl-cce">\"</span>:10}<span class="pl-pds">"</span></span>;
+    <span class="pl-c"><span class="pl-c">//</span> conversion to GenericData.Record</span>
+    <span class="pl-smi">JsonAvroConverter</span> converter <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">JsonAvroConverter</span>();
+    <span class="pl-smi">GenericData</span><span class="pl-k">.</span><span class="pl-smi">Record</span> record <span class="pl-k">=</span> converter<span class="pl-k">.</span>convertToGenericDataRecord(
+        json<span class="pl-k">.</span>getBytes(<span class="pl-smi">CharEncoding</span><span class="pl-c1"><span class="pl-k">.</span>UTF_8</span>), <span class="pl-k">new</span> <span class="pl-smi">org.apache.avro<span class="pl-k">.</span>Schema</span>.<span class="pl-smi">Parser</span>()<span class="pl-k">.</span>parse(avroSchema));
+    <span class="pl-c"><span class="pl-c">//</span> prepare carbon schema from avro schema </span>
+    <span class="pl-smi">org.apache.carbondata.sdk.file<span class="pl-k">.</span>Schema</span> carbonSchema <span class="pl-k">=</span>
+            <span class="pl-smi">AvroCarbonWriter</span><span class="pl-k">.</span>getCarbonSchemaFromAvroSchema(avroSchema);
+    <span class="pl-k">try</span> {
+      <span class="pl-smi">CarbonWriter</span> writer <span class="pl-k">=</span> <span class="pl-smi">CarbonWriter</span><span class="pl-k">.</span>builder()
+          .withSchema(carbonSchema)
+          .outputPath(path)
+          .buildWriterForAvroInput();
+      <span class="pl-k">for</span> (<span class="pl-k">int</span> i <span class="pl-k">=</span> <span class="pl-c1">0</span>; i <span class="pl-k">&lt;</span> <span class="pl-c1">100</span>; i<span class="pl-k">++</span>) {
+        writer<span class="pl-k">.</span>write(record);
+      }
+      writer<span class="pl-k">.</span>close();
+    } <span class="pl-k">catch</span> (<span class="pl-smi">Exception</span> e) {
+      e<span class="pl-k">.</span>printStackTrace();
+    }
+  }
+<a id="datatypes-mapping" class="anchor" href="#datatypes-mapping" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Datatypes Mapping</h2>
+<p>Each of SQL data types are mapped into data types of SDK. Following are the mapping:</p>
+<th>SQL DataTypes</th>
+<th>Mapped SDK DataTypes</th>
+<td>DataTypes.createDecimalType(precision, scale)</td>
+<a id="api-list" class="anchor" href="#api-list" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>API List</h2>
+<a id="class-orgapachecarbondatasdkfilecarbonwriterbuilder" class="anchor" href="#class-orgapachecarbondatasdkfilecarbonwriterbuilder" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.CarbonWriterBuilder</h3>
+* prepares the builder with the schema provided
+* @param schema is instance of Schema
+*        This method must be called when building CarbonWriterBuilder
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder withSchema(Schema schema);
+* Sets the output path of the writer builder
+* @param path is the absolute path where output files are written
+*             This method must be called when building CarbonWriterBuilder
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder outputPath(String path);
+* If set false, writes the carbondata and carbonindex files in a flat folder structure
+* @param isTransactionalTable is a boolelan value
+*             if set to false, then writes the carbondata and carbonindex files
+*                                                            in a flat folder structure.
+*             if set to true, then writes the carbondata and carbonindex files
+*                                                            in segment folder structure..
+*             By default set to false.
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder isTransactionalTable(boolean isTransactionalTable);
+* to set the timestamp in the carbondata and carbonindex index files
+* @param UUID is a timestamp to be used in the carbondata and carbonindex index files.
+*             By default set to zero.
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder uniqueIdentifier(long UUID);
+* To set the carbondata file size in MB between 1MB-2048MB
+* @param blockSize is size in MB between 1MB to 2048 MB
+*                  default value is 1024 MB
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder withBlockSize(int blockSize);
+* To set the blocklet size of carbondata file
+* @param blockletSize is blocklet size in MB
+*                     default value is 64 MB
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder withBlockletSize(int blockletSize);
+* sets the list of columns that needs to be in sorted order
+* @param sortColumns is a string array of columns that needs to be sorted.
+*                    If it is null or by default all dimensions are selected for sorting
+*                    If it is empty array, no columns are sorted
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder sortBy(String[] sortColumns);
+* If set, create a schema file in metadata folder.
+* @param persist is a boolean value, If set to true, creates a schema file in metadata folder.
+*                By default set to false. will not create metadata folder
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder persistSchemaFile(boolean persist);
+* sets the taskNo for the writer. SDKs concurrently running
+* will set taskNo in order to avoid conflicts in file's name during write.
+* @param taskNo is the TaskNo user wants to specify.
+*               by default it is system time in nano seconds.
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder taskNo(String taskNo);
+* To support the load options for sdk writer
+* @param options key,value pair of load options.
+*                supported keys values are
+*                a. bad_records_logger_enable -- true (write into separate logs), false
+*                b. bad_records_action -- FAIL, FORCE, IGNORE, REDIRECT
+*                c. bad_record_path -- path
+*                d. dateformat -- same as JAVA SimpleDateFormat
+*                e. timestampformat -- same as JAVA SimpleDateFormat
+*                f. complex_delimiter_level_1 -- value to Split the complexTypeData
+*                g. complex_delimiter_level_2 -- value to Split the nested complexTypeData
+*                h. quotechar
+*                i. escapechar
+*                Default values are as follows.
+*                a. bad_records_logger_enable -- "false"
+*                b. bad_records_action -- "FAIL"
+*                c. bad_record_path -- ""
+*                d. dateformat -- "" , uses from file
+*                e. timestampformat -- "", uses from file
+*                f. complex_delimiter_level_1 -- "$"
+*                g. complex_delimiter_level_2 -- ":"
+*                h. quotechar -- "\""
+*                i. escapechar -- "\\"
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder withLoadOptions(Map&lt;String, String&gt; options);
+* Build a {@link CarbonWriter}, which accepts row in CSV format object
+* @return CSVCarbonWriter
+* @throws IOException
+* @throws InvalidLoadOptionException
+public CarbonWriter buildWriterForCSVInput() throws IOException, InvalidLoadOptionException;
+* Build a {@link CarbonWriter}, which accepts Avro format object
+* @return AvroCarbonWriter 
+* @throws IOException
+* @throws InvalidLoadOptionException
+public CarbonWriter buildWriterForAvroInput() throws IOException, InvalidLoadOptionException;
+<a id="class-orgapachecarbondatasdkfilecarbonwriter" class="anchor" href="#class-orgapachecarbondatasdkfilecarbonwriter" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.CarbonWriter</h3>
+* Write an object to the file, the format of the object depends on the implementation
+* If AvroCarbonWriter, object is of type org.apache.avro.generic.GenericData.Record 
+* If CSVCarbonWriter, object is of type String[]
+* Note: This API is not thread safe
+* @param object
+* @throws IOException
+public abstract void write(Object object) throws IOException;
+* Flush and close the writer
+public abstract void close() throws IOException;
+* Create a {@link CarbonWriterBuilder} to build a {@link CarbonWriter}
+public static CarbonWriterBuilder builder() {
+return new CarbonWriterBuilder();
+<a id="class-orgapachecarbondatasdkfilefield" class="anchor" href="#class-orgapachecarbondatasdkfilefield" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.Field</h3>
+* Field Constructor
+* @param name name of the field
+* @param type datatype of field, specified in strings.
+public Field(String name, String type);
+* Field constructor
+* @param name name of the field
+* @param type datatype of the field of class DataType
+public Field(String name, DataType type);  
+<a id="class-orgapachecarbondatasdkfileschema" class="anchor" href="#class-orgapachecarbondatasdkfileschema" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.Schema</h3>
+* construct a schema with fields
+* @param fields
+public Schema(Field[] fields);
+* Create a Schema using JSON string, for example:
+* [
+*   {"name":"string"},
+*   {"age":"int"}
+* ] 
+* @param json specified as string
+* @return Schema
+public static Schema parseJson(String json);
+<a id="class-orgapachecarbondatasdkfileavrocarbonwriter" class="anchor" href="#class-orgapachecarbondatasdkfileavrocarbonwriter" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.AvroCarbonWriter</h3>
+* converts avro schema to carbon schema, required by carbonWriter
+* @param avroSchemaString json formatted avro schema as string
+* @return carbon sdk schema
+public static org.apache.carbondata.sdk.file.Schema getCarbonSchemaFromAvroSchema(String avroSchemaString);
+<div class="doc-footer">
+    <a href="#top" class="scroll-top">Top</a>
+</section><!-- End systemblock part -->
+<script src="js/custom.js"></script>
\ No newline at end of file
diff --git a/src/main/webapp/streaming-guide.html b/src/main/webapp/streaming-guide.html
index 812f1aa..e9788a9 100644
--- a/src/main/webapp/streaming-guide.html
+++ b/src/main/webapp/streaming-guide.html
@@ -190,16 +190,16 @@
 <p>Start spark-shell in new terminal, type :paste, then copy and run the following code.</p>
-<div class="highlight highlight-source-scala"><pre> <span class="pl-k">import</span> <span class="pl-smi"></span><span class="pl-smi">File</span>
- <span class="pl-k">import</span> <span class="pl-smi">org.apache.spark.sql.</span>{<span class="pl-smi">CarbonEnv</span>, <span class="pl-smi">SparkSession</span>}
- <span class="pl-k">import</span> <span class="pl-smi">org.apache.spark.sql.CarbonSession.</span><span class="pl-smi">_</span>
- <span class="pl-k">import</span> <span class="pl-smi">org.apache.spark.sql.streaming.</span>{<span class="pl-smi">ProcessingTime</span>, <span class="pl-smi">StreamingQuery</span>}
- <span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.util.path.</span><span class="pl-smi">CarbonStorePath</span>
+<div class="highlight highlight-source-scala"><pre> <span class="pl-k">import</span> <span class="pl-en">java</span>.<span class="pl-en">io</span>.<span class="pl-en">File</span>
+ <span class="pl-k">import</span> <span class="pl-en">org</span>.<span class="pl-en">apache</span>.<span class="pl-en">spark</span>.<span class="pl-en">sql</span>.{<span class="pl-en">CarbonEnv</span>, <span class="pl-en">SparkSession</span>}
+ <span class="pl-k">import</span> <span class="pl-en">org</span>.<span class="pl-en">apache</span>.<span class="pl-en">spark</span>.<span class="pl-en">sql</span>.<span class="pl-en">CarbonSession</span>.<span class="pl-en">_</span>
+ <span class="pl-k">import</span> <span class="pl-en">org</span>.<span class="pl-en">apache</span>.<span class="pl-en">spark</span>.<span class="pl-en">sql</span>.<span class="pl-en">streaming</span>.{<span class="pl-en">ProcessingTime</span>, <span class="pl-en">StreamingQuery</span>}
+ <span class="pl-k">import</span> <span class="pl-en">org</span>.<span class="pl-en">apache</span>.<span class="pl-en">carbondata</span>.<span class="pl-en">core</span>.<span class="pl-en">util</span>.<span class="pl-en">path</span>.<span class="pl-en">CarbonStorePath</span>
- <span class="pl-k">val</span> <span class="pl-en">warehouse</span> <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-en">File</span>(<span class="pl-s"><span class="pl-pds">"</span>./warehouse<span class="pl-pds">"</span></span>).getCanonicalPath
- <span class="pl-k">val</span> <span class="pl-en">metastore</span> <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-en">File</span>(<span class="pl-s"><span class="pl-pds">"</span>./metastore<span class="pl-pds">"</span></span>).getCanonicalPath
+ <span class="pl-k">val</span> <span class="pl-smi">warehouse</span> <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-en">File</span>(<span class="pl-s"><span class="pl-pds">"</span>./warehouse<span class="pl-pds">"</span></span>).getCanonicalPath
+ <span class="pl-k">val</span> <span class="pl-smi">metastore</span> <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-en">File</span>(<span class="pl-s"><span class="pl-pds">"</span>./metastore<span class="pl-pds">"</span></span>).getCanonicalPath
- <span class="pl-k">val</span> <span class="pl-en">spark</span> <span class="pl-k">=</span> <span class="pl-en">SparkSession</span>
+ <span class="pl-k">val</span> <span class="pl-smi">spark</span> <span class="pl-k">=</span> <span class="pl-en">SparkSession</span>
    .master(<span class="pl-s"><span class="pl-pds">"</span>local<span class="pl-pds">"</span></span>)
    .appName(<span class="pl-s"><span class="pl-pds">"</span>StreamExample<span class="pl-pds">"</span></span>)
@@ -220,12 +220,12 @@
 <span class="pl-s">      | STORED BY 'carbondata'</span>
 <span class="pl-s">      | TBLPROPERTIES('streaming'='true')<span class="pl-pds">"""</span></span>.stripMargin)
- <span class="pl-k">val</span> <span class="pl-en">carbonTable</span> <span class="pl-k">=</span> <span class="pl-en">CarbonEnv</span>.getCarbonTable(<span class="pl-en">Some</span>(<span class="pl-s"><span class="pl-pds">"</span>default<span class="pl-pds">"</span></span>), <span class="pl-s"><span class="pl-pds">"</span>carbon_table<span class="pl-pds">"</span></span>)(spark)
- <span class="pl-k">val</span> <span class="pl-en">tablePath</span> <span class="pl-k">=</span> <span class="pl-en">CarbonStorePath</span>.getCarbonTablePath(carbonTable.getAbsoluteTableIdentifier)
+ <span class="pl-k">val</span> <span class="pl-smi">carbonTable</span> <span class="pl-k">=</span> <span class="pl-en">CarbonEnv</span>.getCarbonTable(<span class="pl-en">Some</span>(<span class="pl-s"><span class="pl-pds">"</span>default<span class="pl-pds">"</span></span>), <span class="pl-s"><span class="pl-pds">"</span>carbon_table<span class="pl-pds">"</span></span>)(spark)
+ <span class="pl-k">val</span> <span class="pl-smi">tablePath</span> <span class="pl-k">=</span> <span class="pl-en">CarbonStorePath</span>.getCarbonTablePath(carbonTable.getAbsoluteTableIdentifier)
  <span class="pl-c"><span class="pl-c">//</span> batch load</span>
- <span class="pl-k">var</span> <span class="pl-en">qry</span><span class="pl-k">:</span> <span class="pl-en">StreamingQuery</span> <span class="pl-k">=</span> <span class="pl-c1">null</span>
- <span class="pl-k">val</span> <span class="pl-en">readSocketDF</span> <span class="pl-k">=</span> spark.readStream
+ <span class="pl-k">var</span> <span class="pl-smi">qry</span><span class="pl-k">:</span> <span class="pl-en">StreamingQuery</span> <span class="pl-k">=</span> <span class="pl-c1">null</span>
+ <span class="pl-k">val</span> <span class="pl-smi">readSocketDF</span> <span class="pl-k">=</span> spark.readStream
    .format(<span class="pl-s"><span class="pl-pds">"</span>socket<span class="pl-pds">"</span></span>)
    .option(<span class="pl-s"><span class="pl-pds">"</span>host<span class="pl-pds">"</span></span>, <span class="pl-s"><span class="pl-pds">"</span>localhost<span class="pl-pds">"</span></span>)
    .option(<span class="pl-s"><span class="pl-pds">"</span>port<span class="pl-pds">"</span></span>, <span class="pl-c1">9099</span>)
@@ -327,7 +327,7 @@ streaming table using following DDL.</p>
 <a id="change-segment-status" class="anchor" href="#change-segment-status" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Change segment status</h2>
-<p>Use below command to change the status of "streaming" segment to "streaming finish" segment.</p>
+<p>Use below command to change the status of "streaming" segment to "streaming finish" segment. If the streaming application is running, this command will be blocked.</p>
 <div class="highlight highlight-source-sql"><pre><span class="pl-k">ALTER</span> <span class="pl-k">TABLE</span> streaming_table FINISH STREAMING</pre></div>
 <a id="handoff-streaming-finish-segment-to-columnar-segment" class="anchor" href="#handoff-streaming-finish-segment-to-columnar-segment" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Handoff "streaming finish" segment to columnar segment</h2>
@@ -379,8 +379,8 @@ streaming table using following DDL.</p>
  <span class="pl-k">case</span> <span class="pl-k">class</span> <span class="pl-en">StreamData</span>(<span class="pl-v">id</span>: <span class="pl-k">Int</span>, <span class="pl-v">name</span>: <span class="pl-k">String</span>, <span class="pl-v">city</span>: <span class="pl-k">String</span>, <span class="pl-v">salary</span>: <span class="pl-k">Float</span>, <span class="pl-v">file</span>: <span class="pl-en">FileElement</span>)
- <span class="pl-k">var</span> <span class="pl-en">qry</span><span class="pl-k">:</span> <span class="pl-en">StreamingQuery</span> <span class="pl-k">=</span> <span class="pl-c1">null</span>
- <span class="pl-k">val</span> <span class="pl-en">readSocketDF</span> <span class="pl-k">=</span> spark.readStream
+ <span class="pl-k">var</span> <span class="pl-smi">qry</span><span class="pl-k">:</span> <span class="pl-en">StreamingQuery</span> <span class="pl-k">=</span> <span class="pl-c1">null</span>
+ <span class="pl-k">val</span> <span class="pl-smi">readSocketDF</span> <span class="pl-k">=</span> spark.readStream
    .format(<span class="pl-s"><span class="pl-pds">"</span>socket<span class="pl-pds">"</span></span>)
    .option(<span class="pl-s"><span class="pl-pds">"</span>host<span class="pl-pds">"</span></span>, <span class="pl-s"><span class="pl-pds">"</span>localhost<span class="pl-pds">"</span></span>)
    .option(<span class="pl-s"><span class="pl-pds">"</span>port<span class="pl-pds">"</span></span>, <span class="pl-c1">9099</span>)
@@ -388,8 +388,8 @@ streaming table using following DDL.</p>
    .as[<span class="pl-k">String</span>]
    .map(_.split(<span class="pl-s"><span class="pl-pds">"</span>,<span class="pl-pds">"</span></span>))
    .map { fields <span class="pl-k">=&gt;</span> {
-     <span class="pl-k">val</span> <span class="pl-en">tmp</span> <span class="pl-k">=</span> fields(<span class="pl-c1">4</span>).split(<span class="pl-s"><span class="pl-pds">"</span><span class="pl-cce">\\</span>$<span class="pl-pds">"</span></span>)
-     <span class="pl-k">val</span> <span class="pl-en">file</span> <span class="pl-k">=</span> <span class="pl-en">FileElement</span>(tmp(<span class="pl-c1">0</span>).split(<span class="pl-s"><span class="pl-pds">"</span>:<span class="pl-pds">"</span></span>), tmp(<span class="pl-c1">1</span>).toInt)
+     <span class="pl-k">val</span> <span class="pl-smi">tmp</span> <span class="pl-k">=</span> fields(<span class="pl-c1">4</span>).split(<span class="pl-s"><span class="pl-pds">"</span><span class="pl-cce">\\</span>$<span class="pl-pds">"</span></span>)
+     <span class="pl-k">val</span> <span class="pl-smi">file</span> <span class="pl-k">=</span> <span class="pl-en">FileElement</span>(tmp(<span class="pl-c1">0</span>).split(<span class="pl-s"><span class="pl-pds">"</span>:<span class="pl-pds">"</span></span>), tmp(<span class="pl-c1">1</span>).toInt)
      <span class="pl-en">StreamData</span>(fields(<span class="pl-c1">0</span>).toInt, fields(<span class="pl-c1">1</span>), fields(<span class="pl-c1">2</span>), fields(<span class="pl-c1">3</span>).toFloat, file)
    } }
@@ -408,11 +408,11 @@ streaming table using following DDL.</p>
 <a id="how-to-implement-a-customized-stream-parser" class="anchor" href="#how-to-implement-a-customized-stream-parser" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>How to implement a customized stream parser</h3>
 <p>If user needs to implement a customized stream parser to convert a specific InternalRow to Object[], it needs to implement <code>initialize</code> method and <code>parserRow</code> method of interface <code>CarbonStreamParser</code>, for example:</p>
-<div class="highlight highlight-source-scala"><pre> <span class="pl-k">package</span> <span class="pl-en">org.XXX.XXX.streaming.parser</span>
+<div class="highlight highlight-source-scala"><pre> <span class="pl-k">package</span> <span class="pl-en">org</span>.<span class="pl-en">XXX</span>.<span class="pl-en">XXX</span>.<span class="pl-en">streaming</span>.<span class="pl-en">parser</span>
- <span class="pl-k">import</span> <span class="pl-smi">org.apache.hadoop.conf.</span><span class="pl-smi">Configuration</span>
- <span class="pl-k">import</span> <span class="pl-smi">org.apache.spark.sql.catalyst.</span><span class="pl-smi">InternalRow</span>
- <span class="pl-k">import</span> <span class="pl-smi">org.apache.spark.sql.types.</span><span class="pl-smi">StructType</span>
+ <span class="pl-k">import</span> <span class="pl-en">org</span>.<span class="pl-en">apache</span>.<span class="pl-en">hadoop</span>.<span class="pl-en">conf</span>.<span class="pl-en">Configuration</span>
+ <span class="pl-k">import</span> <span class="pl-en">org</span>.<span class="pl-en">apache</span>.<span class="pl-en">spark</span>.<span class="pl-en">sql</span>.<span class="pl-en">catalyst</span>.<span class="pl-en">InternalRow</span>
+ <span class="pl-k">import</span> <span class="pl-en">org</span>.<span class="pl-en">apache</span>.<span class="pl-en">spark</span>.<span class="pl-en">sql</span>.<span class="pl-en">types</span>.<span class="pl-en">StructType</span>
  <span class="pl-k">class</span> <span class="pl-en">XXXStreamParserImp</span> <span class="pl-k">extends</span> <span class="pl-e">CarbonStreamParser</span> {
diff --git a/src/site/markdown/ b/src/site/markdown/
index c4724cc..e39d61b 100644
--- a/src/site/markdown/
+++ b/src/site/markdown/
@@ -39,6 +39,7 @@ This section provides the details of all the configurations required for the Car
 | | true | If this parameter value is set to true, auto trigger handoff function will be enabled.|
 | carbon.streaming.segment.max.size | 1024000000 | This parameter defines the maximum size of the streaming segment. Setting this parameter to appropriate value will avoid impacting the streaming ingestion. The value is in bytes.|
 | | true | If this parameter value is set to true, show tables command will list all the tables including datatmaps(eg: Preaggregate table), else datamaps will be excluded from the table list. |
+| carbon.segment.lock.files.preserve.hours | 48 | This property value indicates the number of hours the segment lock files will be preserved after dataload. These lock files will be deleted with the clean command after the configured number of hours. |
 ##  Performance Configuration
 This section provides the details of all the configurations required for CarbonData Performance Optimization.
diff --git a/src/site/markdown/ b/src/site/markdown/
index ecaf068..51e98ab 100644
--- a/src/site/markdown/
+++ b/src/site/markdown/
@@ -39,7 +39,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
   [TBLPROPERTIES (property_name=property_value, ...)]
   [LOCATION 'path']
+  **NOTE:** CarbonData also supports "STORED AS carbondata". Find example code at [CarbonSessionExample]( in the CarbonData repo.
 ### Usage Guidelines
   Following are the guidelines for TBLPROPERTIES, CarbonData's additional table options can be set via
@@ -93,7 +93,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
-     NOTE: 512 or 512M both are accepted.
+     **NOTE:** 512 or 512M both are accepted.
    - **Table Compaction Configuration**
@@ -138,10 +138,11 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
     TBLPROPERTIES ('SORT_COLUMNS'='productName,storeCity',
+  **NOTE:** CarbonData also supports "using carbondata". Find example code at [SparkSessionExample]( in the CarbonData repo.
   This function allows user to create a Carbon table from any of the Parquet/Hive/Carbon table. This is beneficial when the user wants to create Carbon table from any other Parquet/Hive table and use the Carbon query engine to query and achieve better query results for cases where Carbon is faster than other file formats. Also this feature can be used for backing up the data.
-### Syntax
   CREATE TABLE [IF NOT EXISTS] [db_name.]table_name 
   STORED BY 'carbondata' 
@@ -174,6 +175,50 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
+  This function allows user to create external table by specifying location.
+  ```
+  CREATE EXTERNAL TABLE [IF NOT EXISTS] [db_name.]table_name 
+  STORED BY 'carbondata' LOCATION ‘$FilesPath’
+  ```
+### Create external table on managed table data location.
+  Managed table data location provided will have both FACT and Metadata folder. 
+  This data can be generated by creating a normal carbon table and use this path as $FilesPath in the above syntax.
+  **Example:**
+  ```
+  sql("CREATE TABLE origin(key INT, value STRING) STORED BY 'carbondata'")
+  sql("INSERT INTO origin select 100,'spark'")
+  sql("INSERT INTO origin select 200,'hive'")
+  // creates a table in $storeLocation/origin
+  sql(s"""
+  |STORED BY 'carbondata'
+  |LOCATION '$storeLocation/origin'
+  """.stripMargin)
+  checkAnswer(sql("SELECT count(*) from source"), sql("SELECT count(*) from origin"))
+  ```
+### Create external table on Non-Transactional table data location.
+  Non-Transactional table data location will have only carbondata and carbonindex files, there will not be a metadata folder (table status and schema).
+  Our SDK module currently support writing data in this format.
+  **Example:**
+  ```
+  sql(
+  s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
+  |'$writerPath' """.stripMargin)
+  ```
+  Here writer path will have carbondata and index files.
+  This can be SDK output. Refer [SDK Writer Guide]( 
+  **Note:**
+  Dropping of the external table should not delete the files present in the location.
   This function creates a new database. By default the database is created in Carbon store location, but you can also specify custom location.
@@ -268,7 +313,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
      Valid Scenarios
      - Invalid scenario - Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same.
      - Valid scenario - Change of decimal precision from (10,2) to (12,3) is valid as the total number of digits are increased by 2 but scale is increased only by 1 which will not lead to any data loss.
-     - NOTE: The allowed range is 38,38 (precision, scale) and is a valid upper case scenario which is not resulting in data loss.
+     - **NOTE:** The allowed range is 38,38 (precision, scale) and is a valid upper case scenario which is not resulting in data loss.
      Example1:Changing data type of column a1 from INT to BIGINT.
@@ -303,7 +348,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
   REFRESH TABLE dbcarbon.productSalesTable
-  NOTE: 
+  **NOTE:** 
   * The new database name and the old database name should be same.
   * Before executing this command the old table schema and data should be copied into the new database location.
   * If the table is aggregate table, then all the aggregate tables should be copied to the new database location.
@@ -385,7 +430,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
-	NOTE: If the HEADER option exist and is set to 'true', then the FILEHEADER option is not required.
+	**NOTE:** If the HEADER option exist and is set to 'true', then the FILEHEADER option is not required.
   - **FILEHEADER:** Headers can be provided in the LOAD DATA command if headers are missing in the source files.
@@ -433,25 +478,29 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
-    NOTE: ALL_DICTIONARY_PATH and COLUMNDICT can't be used together.
+    **NOTE:** ALL_DICTIONARY_PATH and COLUMNDICT can't be used together.
   - **DATEFORMAT/TIMESTAMPFORMAT:** Date and Timestamp format for specified column.
     OPTIONS('DATEFORMAT' = 'yyyy-MM-dd','TIMESTAMPFORMAT'='yyyy-MM-dd HH:mm:ss')
-    NOTE: Date formats are specified by date pattern strings. The date pattern letters in CarbonData are same as in JAVA. Refer to [SimpleDateFormat](
+    **NOTE:** Date formats are specified by date pattern strings. The date pattern letters in CarbonData are same as in JAVA. Refer to [SimpleDateFormat](
   - **SORT COLUMN BOUNDS:** Range bounds for sort columns.
+    Suppose the table is created with 'SORT_COLUMNS'='name,id' and the range for name is aaa~zzz, the value range for id is 0~1000. Then during data loading, we can specify the following option to enhance data loading performance.
-    OPTIONS('SORT_COLUMN_BOUNDS'='v11,v21,v31;v12,v22,v32;v13,v23,v33')
+    OPTIONS('SORT_COLUMN_BOUNDS'='f,250;l,500;r,750')
-    NOTE:
+    Each bound is separated by ';' and each field value in bound is separated by ','. In the example above, we provide 3 bounds to distribute records to 4 partitions. The values 'f','l','r' can evenly distribute the records. Inside carbondata, for a record we compare the value of sort columns with that of the bounds and decide which partition the record will be forwarded to.
+    **NOTE:**
     * SORT_COLUMN_BOUNDS will be used only when the SORT_SCOPE is 'local_sort'.
-    * Each bound is separated by ';' and each field value in bound is separated by ','.
-    * Carbondata will use these bounds as ranges to process data concurrently.
+    * Carbondata will use these bounds as ranges to process data concurrently during the final sort percedure. The records will be sorted and written out inside each partition. Since the partition is sorted, all records will be sorted.
     * Since the actual order and literal order of the dictionary column are not necessarily the same, we do not recommend you to use this feature if the first sort column is 'dictionary_include'.
+    * The option works better if your CPU usage during loading is low. If your system is already CPU tense, better not to use this option. Besides, it depends on the user to specify the bounds. If user does not know the exactly bounds to make the data distributed evenly among the bounds, loading performance will still be better than before or at least the same as before.
+    * Users can find more information about this option in the description of PR1953.
   - **SINGLE_PASS:** Single Pass Loading enables single job to finish data loading with dictionary generation on the fly. It enhances performance in the scenarios where the subsequent data loading after initial load involves fewer incremental updates on the dictionary.
@@ -461,7 +510,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
-   NOTE:
+   **NOTE:**
    * If this option is set to TRUE then data loading will take less time.
    * If this option is set to some invalid value other than TRUE or FALSE then it uses the default value.
@@ -489,7 +538,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
-  NOTE:
+  **NOTE:**
   * BAD_RECORDS_ACTION property can have four type of actions for bad records FORCE, REDIRECT, IGNORE and FAIL.
   * FAIL option is its Default value. If the FAIL option is used, then data loading fails if any bad records are found.
   * If the REDIRECT option is used, CarbonData will add all bad records in to a separate CSV file. However, this file must not be used for subsequent data loading because the content may not exactly match the source record. You are advised to cleanse the original source record for further data ingestion. This option is used to remind you which records are bad records.
@@ -526,11 +575,11 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
   Overwrite insert data:
   [ WHERE { <filter_condition> } ]
-  NOTE:
+  **NOTE:**
   * The source table and the CarbonData table must have the same table schema.
   * The data type of source and destination table columns should be same
   * INSERT INTO command does not support partial success if bad records are found, it will fail.
@@ -546,7 +595,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
@@ -569,7 +618,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
   [ WHERE { <filter_condition> } ]
-  NOTE:The update command fails if multiple input rows in source table are matched with single row in destination table.
+  **NOTE:** The update command fails if multiple input rows in source table are matched with single row in destination table.
@@ -622,10 +671,10 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
   Compaction improves the query performance significantly. 
-  There are two types of compaction, Minor and Major compaction.
+  There are several types of compaction.
-  ALTER TABLE [db_name.]table_name COMPACT 'MINOR/MAJOR'
   - **Minor Compaction**
@@ -651,6 +700,17 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
+  - **Custom Compaction**
+  In Custom compaction, user can directly specify segment ids to be merged into one large segment. 
+  All specified segment ids should exist and be valid, otherwise compaction will fail. 
+  Custom compaction is usually done during the off-peak time. 
+  ```
+  ```
   - **CLEAN SEGMENTS AFTER Compaction**
@@ -778,7 +838,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
                   'NUM_PARTITIONS'='N' ...)]
-  NOTE: N is the number of hash partitions
+  **NOTE:** N is the number of hash partitions
@@ -805,7 +865,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
                   'RANGE_INFO'='2014-01-01, 2015-01-01, 2016-01-01, ...')]
-  NOTE:
+  **NOTE:**
   * The 'RANGE_INFO' must be defined in ascending order in the table properties.
   * The default format for partition column of Date/Timestamp type is yyyy-MM-dd. Alternate formats for Date/Timestamp could be defined in CarbonProperties.
@@ -834,7 +894,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
                   'LIST_INFO'='A, B, C, ...')]
-  NOTE: List partition supports list info in one level group.
+  **NOTE:** List partition supports list info in one level group.
@@ -883,7 +943,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
   ALTER TABLE [db_name].table_name DROP PARTITION(partition_id) WITH DATA
-  NOTE:
+  **NOTE:**
   * Hash partition table is not supported for ADD, SPLIT and DROP commands.
   * Partition Id: in CarbonData like the hive, folders are not used to divide partitions instead partition id is used to replace the task id. It could make use of the characteristic and meanwhile reduce some metadata.
@@ -913,7 +973,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
-  NOTE:
+  **NOTE:**
   * Bucketing cannot be performed for columns of Complex Data Types.
   * Columns in the BUCKETCOLUMN parameter must be dimensions. The BUCKETCOLUMN parameter cannot be a measure or a combination of measures and dimensions.
@@ -939,13 +999,18 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
   This command is used to list the segments of CarbonData table.
-  SHOW SEGMENTS FOR TABLE [db_name.]table_name LIMIT number_of_segments
+  SHOW [HISTORY] SEGMENTS FOR TABLE [db_name.]table_name LIMIT number_of_segments
+  Show visible segments
   SHOW SEGMENTS FOR TABLE CarbonDatabase.CarbonTable LIMIT 4
+  Show all segments, include invisible segments
+  ```
+  ```
@@ -999,7 +1064,7 @@ This tutorial is going to introduce all commands and data operations on CarbonDa
   SET carbon.input.segments.<database_name>.<table_name> = <list of segment IDs>
-  NOTE:
+  **NOTE:**
   carbon.input.segments: Specifies the segment IDs to be queried. This property allows you to query specified segments of the specified table. The CarbonScan will read data from specified segments only.
   If user wants to query with segments reading in multi threading mode, then CarbonSession. threadSet can be used instead of SET query.
diff --git a/src/site/markdown/ b/src/site/markdown/
new file mode 100644
index 0000000..31afd34
--- /dev/null
+++ b/src/site/markdown/
@@ -0,0 +1,16 @@
+# DataMap Developer Guide
+### Introduction
+DataMap is a data structure that can be used to accelerate certain query of the table. Different DataMap can be implemented by developers. 
+Currently, there are two 2 types of DataMap supported:
+1. IndexDataMap: DataMap that leveraging index to accelerate filter query
+2. MVDataMap: DataMap that leveraging Materialized View to accelerate olap style query, like SPJG query (select, predicate, join, groupby)
+### DataMap provider
+When user issues `CREATE DATAMAP dm ON TABLE main USING 'provider'`, the corresponding DataMapProvider implementation will be created and initialized. 
+Currently, the provider string can be:
+1. preaggregate: one type of MVDataMap that do pre-aggregate of single table
+2. timeseries: one type of MVDataMap that do pre-aggregate based on time dimension of the table
+3. class name IndexDataMapFactory  implementation: Developer can implement new type of IndexDataMap by extending IndexDataMapFactory
+When user issues `DROP DATAMAP dm ON TABLE main`, the corresponding DataMapProvider interface will be called.
\ No newline at end of file
diff --git a/src/site/markdown/ b/src/site/markdown/
index b5f8254..9f74842 100644
--- a/src/site/markdown/
+++ b/src/site/markdown/
@@ -26,6 +26,8 @@
 * [How to resolve Abstract Method Error?](#how-to-resolve-abstract-method-error)
 * [How Carbon will behave when execute insert operation in abnormal scenarios?](#how-carbon-will-behave-when-execute-insert-operation-in-abnormal-scenarios)
 * [Why aggregate query is not fetching data from aggregate table?](#why-aggregate-query-is-not-fetching-data-from-aggregate-table)
+* [Why all executors are showing success in Spark UI even after Dataload command failed at Driver side?](#Why-all-executors-are-showing-success-in-Spark-UI-even-after-Dataload-command-failed-at-driver-side)
+* [Why different time zone result for select query output when query SDK writer output?](#Why-different-time-zone-result-for-select-query-output-when-query-SDK-writer-output)
 ## What are Bad Records?
 Records that fail to get loaded into the CarbonData due to data type incompatibility or are empty or have incompatible format are classified as Bad Records.
@@ -178,4 +180,18 @@ create datamap ag1 on table gdp21 using 'preaggregate' as select cntry, sum(gdp)
 select cntry,sum(gdp) from gdp21,pop1 where cntry=ctry group by cntry;
+## Why all executors are showing success in Spark UI even after Dataload command failed at Driver side?
+Spark executor shows task as failed after the maximum number of retry attempts, but loading the data having bad records and BAD_RECORDS_ACTION (carbon.bad.records.action) is set as “FAIL” will attempt only once but will send the signal to driver as failed instead of throwing the exception to retry, as there is no point to retry if bad record found and BAD_RECORDS_ACTION is set to fail. Hence the Spark executor displays this one attempt as successful but the command has actually failed to execute. Task attempts or executor logs can be checked to observe the failure reason.
+## Why different time zone result for select query output when query SDK writer output? 
+SDK writer is an independent entity, hence SDK writer can generate carbondata files from a non-cluster machine that has different time zones. But at cluster when those files are read, it always takes cluster time-zone. Hence, the value of timestamp and date datatype fields are not original value.
+If wanted to control timezone of data while writing, then set cluster's time-zone in SDK writer by calling below API.
+cluster timezone is Asia/Shanghai
diff --git a/src/site/markdown/ b/src/site/markdown/
index 84f06c4..ce7cbcc 100644
--- a/src/site/markdown/
+++ b/src/site/markdown/
@@ -105,7 +105,10 @@ DataMap can be created using following DDL
 The string followed by USING is called DataMap Provider, in this version CarbonData supports two 
 kinds of DataMap: 
-1. preaggregate, for pre-aggregate table. No DMPROPERTY is required for this DataMap
+1. preaggregate, for pre-aggregate table. Pre-Aggregate table supports two values for DMPROPERTIES.
+   a. 'path' is used to specify the store location of the datamap.('path'='/location/').
+   b. 'partitioning' when set to false enables user to disable partitioning of the datamap.
+       Default value is true for this property.
 2. timeseries, for timeseries roll-up table. Please refer to [Timeseries DataMap](
 DataMap can be dropped using following DDL
diff --git a/src/site/markdown/ b/src/site/markdown/
new file mode 100644
index 0000000..9878b71
--- /dev/null
+++ b/src/site/markdown/
@@ -0,0 +1,359 @@
+# SDK Writer Guide
+In the carbon jars package, there exist a carbondata-store-sdk-x.x.x-SNAPSHOT.jar.
+This SDK writer, writes carbondata file and carbonindex file at a given path.
+External client can make use of this writer to convert other format data or live data to create carbondata and index files.
+These SDK writer output contains just a carbondata and carbonindex files. No metadata folder will be present.
+## Quick example
+### Example with csv format 
+ import;
+ import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
+ import org.apache.carbondata.core.metadata.datatype.DataTypes;
+ import org.apache.carbondata.sdk.file.CarbonWriter;
+ import org.apache.carbondata.sdk.file.CarbonWriterBuilder;
+ import org.apache.carbondata.sdk.file.Field;
+ import org.apache.carbondata.sdk.file.Schema;
+ public class TestSdk {
+   public static void main(String[] args) throws IOException, InvalidLoadOptionException {
+     testSdkWriter();
+   }
+   public static void testSdkWriter() throws IOException, InvalidLoadOptionException {
+     String path = "/home/root1/Documents/ab/temp";
+     Field[] fields = new Field[2];
+     fields[0] = new Field("name", DataTypes.STRING);
+     fields[1] = new Field("age", DataTypes.INT);
+     Schema schema = new Schema(fields);
+     CarbonWriterBuilder builder = CarbonWriter.builder().withSchema(schema).outputPath(path);
+     CarbonWriter writer = builder.buildWriterForCSVInput();
+     int rows = 5;
+     for (int i = 0; i < rows; i++) {
+       writer.write(new String[] { "robot" + (i % 10), String.valueOf(i) });
+     }
+     writer.close();
+   }
+ }
+### Example with Avro format
+import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.sdk.file.AvroCarbonWriter;
+import org.apache.carbondata.sdk.file.CarbonWriter;
+import org.apache.carbondata.sdk.file.Field;
+import org.apache.avro.generic.GenericData;
+import org.apache.commons.lang.CharEncoding;
+import tech.allegro.schema.json2avro.converter.JsonAvroConverter;
+public class TestSdkAvro {
+  public static void main(String[] args) throws IOException, InvalidLoadOptionException {
+    testSdkWriter();
+  }
+  public static void testSdkWriter() throws IOException, InvalidLoadOptionException {
+    String path = "./AvroCarbonWriterSuiteWriteFiles";
+    // Avro schema
+    String avroSchema =
+        "{" +
+            "   \"type\" : \"record\"," +
+            "   \"name\" : \"Acme\"," +
+            "   \"fields\" : ["
+            + "{ \"name\" : \"fname\", \"type\" : \"string\" },"
+            + "{ \"name\" : \"age\", \"type\" : \"int\" }]" +
+            "}";
+    String json = "{\"fname\":\"bob\", \"age\":10}";
+    // conversion to GenericData.Record
+    JsonAvroConverter converter = new JsonAvroConverter();
+    GenericData.Record record = converter.convertToGenericDataRecord(
+        json.getBytes(CharEncoding.UTF_8), new org.apache.avro.Schema.Parser().parse(avroSchema));
+    // prepare carbon schema from avro schema 
+    org.apache.carbondata.sdk.file.Schema carbonSchema =
+            AvroCarbonWriter.getCarbonSchemaFromAvroSchema(avroSchema);
+    try {
+      CarbonWriter writer = CarbonWriter.builder()
+          .withSchema(carbonSchema)
+          .outputPath(path)
+          .buildWriterForAvroInput();
+      for (int i = 0; i < 100; i++) {
+        writer.write(record);
+      }
+      writer.close();
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+## Datatypes Mapping
+Each of SQL data types are mapped into data types of SDK. Following are the mapping:
+| SQL DataTypes | Mapped SDK DataTypes |
+| BOOLEAN | DataTypes.BOOLEAN |
+| SMALLINT | DataTypes.SHORT |
+| INTEGER | DataTypes.INT |
+| BIGINT | DataTypes.LONG |
+| DOUBLE | DataTypes.DOUBLE |
+| VARCHAR | DataTypes.STRING |
+| DATE | DataTypes.DATE |
+| STRING | DataTypes.STRING |
+| DECIMAL | DataTypes.createDecimalType(precision, scale) |
+## API List
+### Class org.apache.carbondata.sdk.file.CarbonWriterBuilder
+* prepares the builder with the schema provided
+* @param schema is instance of Schema
+*        This method must be called when building CarbonWriterBuilder
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder withSchema(Schema schema);
+* Sets the output path of the writer builder
+* @param path is the absolute path where output files are written
+*             This method must be called when building CarbonWriterBuilder
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder outputPath(String path);
+* If set false, writes the carbondata and carbonindex files in a flat folder structure
+* @param isTransactionalTable is a boolelan value
+*             if set to false, then writes the carbondata and carbonindex files
+*                                                            in a flat folder structure.
+*             if set to true, then writes the carbondata and carbonindex files
+*                                                            in segment folder structure..
+*             By default set to false.
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder isTransactionalTable(boolean isTransactionalTable);
+* to set the timestamp in the carbondata and carbonindex index files
+* @param UUID is a timestamp to be used in the carbondata and carbonindex index files.
+*             By default set to zero.
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder uniqueIdentifier(long UUID);
+* To set the carbondata file size in MB between 1MB-2048MB
+* @param blockSize is size in MB between 1MB to 2048 MB
+*                  default value is 1024 MB
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder withBlockSize(int blockSize);
+* To set the blocklet size of carbondata file
+* @param blockletSize is blocklet size in MB
+*                     default value is 64 MB
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder withBlockletSize(int blockletSize);
+* sets the list of columns that needs to be in sorted order
+* @param sortColumns is a string array of columns that needs to be sorted.
+*                    If it is null or by default all dimensions are selected for sorting
+*                    If it is empty array, no columns are sorted
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder sortBy(String[] sortColumns);
+* If set, create a schema file in metadata folder.
+* @param persist is a boolean value, If set to true, creates a schema file in metadata folder.
+*                By default set to false. will not create metadata folder
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder persistSchemaFile(boolean persist);
+* sets the taskNo for the writer. SDKs concurrently running
+* will set taskNo in order to avoid conflicts in file's name during write.
+* @param taskNo is the TaskNo user wants to specify.
+*               by default it is system time in nano seconds.
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder taskNo(String taskNo);
+* To support the load options for sdk writer
+* @param options key,value pair of load options.
+*                supported keys values are
+*                a. bad_records_logger_enable -- true (write into separate logs), false
+*                b. bad_records_action -- FAIL, FORCE, IGNORE, REDIRECT
+*                c. bad_record_path -- path
+*                d. dateformat -- same as JAVA SimpleDateFormat
+*                e. timestampformat -- same as JAVA SimpleDateFormat
+*                f. complex_delimiter_level_1 -- value to Split the complexTypeData
+*                g. complex_delimiter_level_2 -- value to Split the nested complexTypeData
+*                h. quotechar
+*                i. escapechar
+*                Default values are as follows.
+*                a. bad_records_logger_enable -- "false"
+*                b. bad_records_action -- "FAIL"
+*                c. bad_record_path -- ""
+*                d. dateformat -- "" , uses from file
+*                e. timestampformat -- "", uses from file
+*                f. complex_delimiter_level_1 -- "$"
+*                g. complex_delimiter_level_2 -- ":"
+*                h. quotechar -- "\""
+*                i. escapechar -- "\\"
+* @return updated CarbonWriterBuilder
+public CarbonWriterBuilder withLoadOptions(Map<String, String> options);
+* Build a {@link CarbonWriter}, which accepts row in CSV format object
+* @return CSVCarbonWriter
+* @throws IOException
+* @throws InvalidLoadOptionException
+public CarbonWriter buildWriterForCSVInput() throws IOException, InvalidLoadOptionException;
+* Build a {@link CarbonWriter}, which accepts Avro format object
+* @return AvroCarbonWriter 
+* @throws IOException
+* @throws InvalidLoadOptionException
+public CarbonWriter buildWriterForAvroInput() throws IOException, InvalidLoadOptionException;
+### Class org.apache.carbondata.sdk.file.CarbonWriter
+* Write an object to the file, the format of the object depends on the implementation
+* If AvroCarbonWriter, object is of type org.apache.avro.generic.GenericData.Record 
+* If CSVCarbonWriter, object is of type String[]
+* Note: This API is not thread safe
+* @param object
+* @throws IOException
+public abstract void write(Object object) throws IOException;
+* Flush and close the writer
+public abstract void close() throws IOException;
+* Create a {@link CarbonWriterBuilder} to build a {@link CarbonWriter}
+public static CarbonWriterBuilder builder() {
+return new CarbonWriterBuilder();
+### Class org.apache.carbondata.sdk.file.Field
+* Field Constructor
+* @param name name of the field
+* @param type datatype of field, specified in strings.
+public Field(String name, String type);
+* Field constructor
+* @param name name of the field
+* @param type datatype of the field of class DataType
+public Field(String name, DataType type);  
+### Class org.apache.carbondata.sdk.file.Schema
+* construct a schema with fields
+* @param fields
+public Schema(Field[] fields);
+* Create a Schema using JSON string, for example:
+* [
+*   {"name":"string"},
+*   {"age":"int"}
+* ] 
+* @param json specified as string
+* @return Schema
+public static Schema parseJson(String json);
+### Class org.apache.carbondata.sdk.file.AvroCarbonWriter
+* converts avro schema to carbon schema, required by carbonWriter
+* @param avroSchemaString json formatted avro schema as string
+* @return carbon sdk schema
+public static org.apache.carbondata.sdk.file.Schema getCarbonSchemaFromAvroSchema(String avroSchemaString);
\ No newline at end of file
diff --git a/src/site/markdown/ b/src/site/markdown/
index aa9eaef..3ea2881 100644
--- a/src/site/markdown/
+++ b/src/site/markdown/
@@ -133,7 +133,7 @@ streaming | The segment is running streaming ingestion
 streaming finish | The segment already finished streaming ingestion, <br /> it will be handed off to a segment in the columnar format
 ## Change segment status
-Use below command to change the status of "streaming" segment to "streaming finish" segment.
+Use below command to change the status of "streaming" segment to "streaming finish" segment. If the streaming application is running, this command will be blocked.
diff --git a/src/site/pdf.xml b/src/site/pdf.xml
index 05fbff9..e8449a1 100644
--- a/src/site/pdf.xml
+++ b/src/site/pdf.xml
@@ -16,6 +16,8 @@
         <item name="Installation" ref=''/>
         <item name="Configuring CarbonData" ref=''/>
         <item name="Streaming Guide" ref=''/>
+      <item name="SDK Writer Guide" ref=''/>
+      <item name="DataMap Developer Guide" ref=''/>
         <item name="CarbonData Pre-aggregate DataMap" ref=''/>
         <item name="CarbonData Timeseries DataMap" ref=''/>
         <item name="FAQs" ref=''/>