You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2021/01/27 21:13:25 UTC

[iceberg] branch asf-site updated: Deployed e1a428550 with MkDocs version: 1.0.4

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new aa4bb00  Deployed e1a428550 with MkDocs version: 1.0.4
aa4bb00 is described below

commit aa4bb006c6550b61432915a63514ccc7aa8b7e37
Author: Ryan Blue <bl...@apache.org>
AuthorDate: Wed Jan 27 13:13:08 2021 -0800

    Deployed e1a428550 with MkDocs version: 1.0.4
---
 sitemap.xml                 |  60 ++++-----
 sitemap.xml.gz              | Bin 230 -> 230 bytes
 spark-procedures/index.html | 305 ++++++++++++++++++++++----------------------
 3 files changed, 184 insertions(+), 181 deletions(-)

diff --git a/sitemap.xml b/sitemap.xml
index 2091b52..c04797b 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,97 +2,97 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
@@ -107,17 +107,17 @@
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
@@ -127,42 +127,42 @@
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
      <loc>None</loc>
-     <lastmod>2021-01-26</lastmod>
+     <lastmod>2021-01-27</lastmod>
      <changefreq>daily</changefreq>
     </url>
     <url>
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index dbdfba6..eb27888 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ
diff --git a/spark-procedures/index.html b/spark-procedures/index.html
index 62a0f3c..ee35324 100644
--- a/spark-procedures/index.html
+++ b/spark-procedures/index.html
@@ -395,16 +395,18 @@
                 
                 <li class="third-level"><a href="#named-arguments">Named arguments</a></li>
                 <li class="third-level"><a href="#positional-arguments">Positional arguments</a></li>
-            <li class="second-level"><a href="#maintenance-procedures">Maintenance Procedures</a></li>
+            <li class="second-level"><a href="#snapshot-management">Snapshot management</a></li>
                 
-                <li class="third-level"><a href="#expire_snapshots">expire_snapshots</a></li>
-                <li class="third-level"><a href="#remove_orphan_files">remove_orphan_files</a></li>
-                <li class="third-level"><a href="#rewrite_manifests">rewrite_manifests</a></li>
                 <li class="third-level"><a href="#rollback_to_snapshot">rollback_to_snapshot</a></li>
                 <li class="third-level"><a href="#rollback_to_timestamp">rollback_to_timestamp</a></li>
                 <li class="third-level"><a href="#set_current_snapshot">set_current_snapshot</a></li>
                 <li class="third-level"><a href="#cherrypick_snapshot">cherrypick_snapshot</a></li>
-            <li class="second-level"><a href="#table-migration-procedures">Table migration procedures</a></li>
+            <li class="second-level"><a href="#metadata-management">Metadata management</a></li>
+                
+                <li class="third-level"><a href="#expire_snapshots">expire_snapshots</a></li>
+                <li class="third-level"><a href="#remove_orphan_files">remove_orphan_files</a></li>
+                <li class="third-level"><a href="#rewrite_manifests">rewrite_manifests</a></li>
+            <li class="second-level"><a href="#table-migration">Table migration</a></li>
                 
                 <li class="third-level"><a href="#snapshot">snapshot</a></li>
                 <li class="third-level"><a href="#migrate">migrate</a></li>
@@ -444,14 +446,11 @@
 <pre><code class="sql">CALL catalog_name.system.procedure_name(arg_1, arg_2, ... arg_n)
 </code></pre>
 
-<h2 id="maintenance-procedures">Maintenance Procedures<a class="headerlink" href="#maintenance-procedures" title="Permanent link">&para;</a></h2>
-<p>Many <a href="../maintenance/">maintenance actions</a> can be performed using Iceberg stored procedures.</p>
-<h3 id="expire_snapshots"><code>expire_snapshots</code><a class="headerlink" href="#expire_snapshots" title="Permanent link">&para;</a></h3>
-<p>Each write/update/delete/upsert/compaction in Iceberg produces a new snapshot while keeping the old data and metadata
-around for snapshot isolation and time travel. The <code>expire_snapshots</code> procedure can be used to remove older snapshots
-and their files which are no longer needed.</p>
-<p>This procedure will remove old snapshots and data files which are uniquely required by those old snapshots. This means
-the <code>expire_snapshots</code> procedure will never remove files which are still required by a non-expired snapshot.</p>
+<h2 id="snapshot-management">Snapshot management<a class="headerlink" href="#snapshot-management" title="Permanent link">&para;</a></h2>
+<h3 id="rollback_to_snapshot"><code>rollback_to_snapshot</code><a class="headerlink" href="#rollback_to_snapshot" title="Permanent link">&para;</a></h3>
+<p>Roll back a table to a specific snapshot ID.</p>
+<p>To roll back to a specific time, use <a href="#rollback_to_timestamp"><code>rollback_to_timestamp</code></a>.</p>
+<p><strong>Note</strong> this procedure invalidates all cached Spark plans that reference the affected table.</p>
 <h4 id="usage_1">Usage<a class="headerlink" href="#usage_1" title="Permanent link">&para;</a></h4>
 <table>
 <thead>
@@ -470,16 +469,10 @@ the <code>expire_snapshots</code> procedure will never remove files which are st
 <td>Name of the table to update</td>
 </tr>
 <tr>
-<td><code>older_than</code></td>
-<td>️</td>
-<td>timestamp</td>
-<td>Timestamp before which snapshots will be removed (Default: 5 days ago)</td>
-</tr>
-<tr>
-<td><code>retain_last</code></td>
-<td></td>
-<td>int</td>
-<td>Number of ancestor snapshots to preserve regardless of <code>older_than</code> (defaults to 1)</td>
+<td><code>snapshot_id</code></td>
+<td>✔️</td>
+<td>long</td>
+<td>Snapshot ID to rollback to</td>
 </tr>
 </tbody>
 </table>
@@ -494,33 +487,25 @@ the <code>expire_snapshots</code> procedure will never remove files which are st
 </thead>
 <tbody>
 <tr>
-<td><code>deleted_data_files_count</code></td>
-<td>long</td>
-<td>Number of data files deleted by this operation</td>
-</tr>
-<tr>
-<td><code>deleted_manifest_files_count</code></td>
+<td><code>previous_snapshot_id</code></td>
 <td>long</td>
-<td>Number of manifest files deleted by this operation</td>
+<td>The current snapshot ID before the rollback</td>
 </tr>
 <tr>
-<td><code>deleted_manifest_lists_count</code></td>
+<td><code>current_snapshot_id</code></td>
 <td>long</td>
-<td>Number of manifest List files deleted by this operation</td>
+<td>The new current snapshot ID</td>
 </tr>
 </tbody>
 </table>
-<h4 id="examples">Examples<a class="headerlink" href="#examples" title="Permanent link">&para;</a></h4>
-<p>Remove snapshots older than 10 days ago, but retain the last 100 snapshots:</p>
-<pre><code class="sql">CALL hive_prod.system.expire_snapshots('db.sample', date_sub(current_date(), 10), 100)
-</code></pre>
-
-<p>Erase all snapshots older than the current timestamp but retain the last 5 snapshots:</p>
-<pre><code class="sql">CALL hive_prod.system.expire_snapshots(table =&gt; 'db.sample', older_than =&gt; now(), retain_last =&gt; 5)
+<h4 id="example">Example<a class="headerlink" href="#example" title="Permanent link">&para;</a></h4>
+<p>Roll back table <code>db.sample</code> to snapshot ID <code>1</code>:</p>
+<pre><code class="sql">CALL catalog_name.system.rollback_to_snapshot('db.sample', 1)
 </code></pre>
 
-<h3 id="remove_orphan_files"><code>remove_orphan_files</code><a class="headerlink" href="#remove_orphan_files" title="Permanent link">&para;</a></h3>
-<p>Used to remove files which are not referenced in any metadata files of an Iceberg table and can thus be considered &ldquo;orphaned&rdquo;.</p>
+<h3 id="rollback_to_timestamp"><code>rollback_to_timestamp</code><a class="headerlink" href="#rollback_to_timestamp" title="Permanent link">&para;</a></h3>
+<p>Roll back a table to the snapshot that was current at some time.</p>
+<p><strong>Note</strong> this procedure invalidates all cached Spark plans that reference the affected table.</p>
 <h4 id="usage_2">Usage<a class="headerlink" href="#usage_2" title="Permanent link">&para;</a></h4>
 <table>
 <thead>
@@ -536,25 +521,13 @@ the <code>expire_snapshots</code> procedure will never remove files which are st
 <td><code>table</code></td>
 <td>✔️</td>
 <td>string</td>
-<td>Name of the table to clean</td>
+<td>Name of the table to update</td>
 </tr>
 <tr>
-<td><code>older_than</code></td>
-<td>️</td>
+<td><code>timestamp</code></td>
+<td>✔️</td>
 <td>timestamp</td>
-<td>Remove orphan files created before this timestamp (Defaults to 3 days ago)</td>
-</tr>
-<tr>
-<td><code>location</code></td>
-<td></td>
-<td>string</td>
-<td>Directory to look for files in (defaults to the table&rsquo;s location)</td>
-</tr>
-<tr>
-<td><code>dry_run</code></td>
-<td></td>
-<td>boolean</td>
-<td>When true, don&rsquo;t actually remove files (defaults to false)</td>
+<td>A timestamp to rollback to</td>
 </tr>
 </tbody>
 </table>
@@ -569,26 +542,25 @@ the <code>expire_snapshots</code> procedure will never remove files which are st
 </thead>
 <tbody>
 <tr>
-<td><code>orphan_file_location</code></td>
-<td>String</td>
-<td>The path to each file determined to be an orphan by this command</td>
+<td><code>previous_snapshot_id</code></td>
+<td>long</td>
+<td>The current snapshot ID before the rollback</td>
+</tr>
+<tr>
+<td><code>current_snapshot_id</code></td>
+<td>long</td>
+<td>The new current snapshot ID</td>
 </tr>
 </tbody>
 </table>
-<h4 id="examples_1">Examples<a class="headerlink" href="#examples_1" title="Permanent link">&para;</a></h4>
-<p>List all the files that are candidates for removal by performing a dry run of the <code>remove_orphan_files</code> command on this table without actually removing them:</p>
-<pre><code class="sql">CALL catalog_name.system.remove_orphan_files(table =&gt; 'db.sample', dry_run =&gt; true)
-</code></pre>
-
-<p>Remove any files in the <code>tablelocation/data</code> folder which are not known to the table <code>db.sample</code>.</p>
-<pre><code class="sql">CALL catalog_name.system.remove_orphan_files(table =&gt; 'db.sample', location =&gt; 'tablelocation/data')
+<h4 id="example_1">Example<a class="headerlink" href="#example_1" title="Permanent link">&para;</a></h4>
+<p>Roll back <code>db.sample</code> to a day ago</p>
+<pre><code class="sql">CALL catalog_name.system.rollback_to_timestamp('db.sample', date_sub(current_date(), 1))
 </code></pre>
 
-<h3 id="rewrite_manifests"><code>rewrite_manifests</code><a class="headerlink" href="#rewrite_manifests" title="Permanent link">&para;</a></h3>
-<p>Rewrite manifests for a table to optimize scan planning.</p>
-<p>Data files in manifests are sorted by fields in the partition spec. This procedure runs in parallel using a Spark job.</p>
-<p>See the <a href="./javadoc/master/org/apache/iceberg/actions/RewriteManifestsAction.html"><code>RewriteManifestsAction</code> Javadoc</a>
-to see more configuration options.</p>
+<h3 id="set_current_snapshot"><code>set_current_snapshot</code><a class="headerlink" href="#set_current_snapshot" title="Permanent link">&para;</a></h3>
+<p>Sets the current snapshot ID for a table.</p>
+<p>Unlike rollback, the snapshot is not required to be an ancestor of the current table state.</p>
 <p><strong>Note</strong> this procedure invalidates all cached Spark plans that reference the affected table.</p>
 <h4 id="usage_3">Usage<a class="headerlink" href="#usage_3" title="Permanent link">&para;</a></h4>
 <table>
@@ -608,10 +580,10 @@ to see more configuration options.</p>
 <td>Name of the table to update</td>
 </tr>
 <tr>
-<td><code>use_caching</code></td>
-<td>️</td>
-<td>boolean</td>
-<td>Use Spark caching during operation (defaults to true)</td>
+<td><code>snapshot_id</code></td>
+<td>✔️</td>
+<td>long</td>
+<td>Snapshot ID to set as current</td>
 </tr>
 </tbody>
 </table>
@@ -626,29 +598,26 @@ to see more configuration options.</p>
 </thead>
 <tbody>
 <tr>
-<td><code>rewritten_manifests_count</code></td>
-<td>int</td>
-<td>Number of manifests which were re-written by this command</td>
+<td><code>previous_snapshot_id</code></td>
+<td>long</td>
+<td>The current snapshot ID before the rollback</td>
 </tr>
 <tr>
-<td><code>added_mainfests_count</code></td>
-<td>int</td>
-<td>Number of new manifest files which were written by this command</td>
+<td><code>current_snapshot_id</code></td>
+<td>long</td>
+<td>The new current snapshot ID</td>
 </tr>
 </tbody>
 </table>
-<h4 id="examples_2">Examples<a class="headerlink" href="#examples_2" title="Permanent link">&para;</a></h4>
-<p>Rewrite the manifests in table <code>db.sample</code> and align manifest files with table partitioning.</p>
-<pre><code class="sql">CALL catalog_name.system.rewrite_manifests('db.sample')
-</code></pre>
-
-<p>Rewrite the manifests in table <code>db.sample</code> and disable the use of Spark caching. This could be done to avoid memory issues on executors.</p>
-<pre><code class="sql">CALL catalog_name.system.rewrite_manifests('db.sample', false)
+<h4 id="example_2">Example<a class="headerlink" href="#example_2" title="Permanent link">&para;</a></h4>
+<p>Set the current snapshot for <code>db.sample</code> to 1:</p>
+<pre><code class="sql">CALL catalog_name.system.set_current_snapshot('db.sample', 1)
 </code></pre>
 
-<h3 id="rollback_to_snapshot"><code>rollback_to_snapshot</code><a class="headerlink" href="#rollback_to_snapshot" title="Permanent link">&para;</a></h3>
-<p>Roll back a table to a specific snapshot ID.</p>
-<p>To roll back to a specific time, use <a href="#rollback_to_timestamp"><code>rollback_to_timestamp</code></a>.</p>
+<h3 id="cherrypick_snapshot"><code>cherrypick_snapshot</code><a class="headerlink" href="#cherrypick_snapshot" title="Permanent link">&para;</a></h3>
+<p>Cherry-picks changes from a snapshot into the current table state.</p>
+<p>Cherry-picking creates a new snapshot from an existing snapshot without altering or removing the original.</p>
+<p>Only append and dynamic overwrite snapshots can be cherry-picked.</p>
 <p><strong>Note</strong> this procedure invalidates all cached Spark plans that reference the affected table.</p>
 <h4 id="usage_4">Usage<a class="headerlink" href="#usage_4" title="Permanent link">&para;</a></h4>
 <table>
@@ -671,7 +640,7 @@ to see more configuration options.</p>
 <td><code>snapshot_id</code></td>
 <td>✔️</td>
 <td>long</td>
-<td>Snapshot ID to rollback to</td>
+<td>The snapshot ID to cherry-pick</td>
 </tr>
 </tbody>
 </table>
@@ -686,25 +655,34 @@ to see more configuration options.</p>
 </thead>
 <tbody>
 <tr>
-<td><code>previous_snapshot_id</code></td>
+<td><code>source_snapshot_id</code></td>
 <td>long</td>
-<td>The current snapshot ID before the rollback</td>
+<td>The table&rsquo;s current snapshot before the cherry-pick</td>
 </tr>
 <tr>
 <td><code>current_snapshot_id</code></td>
 <td>long</td>
-<td>The new current snapshot ID</td>
+<td>The snapshot ID created by applying the cherry-pick</td>
 </tr>
 </tbody>
 </table>
-<h4 id="example">Example<a class="headerlink" href="#example" title="Permanent link">&para;</a></h4>
-<p>Roll back table <code>db.sample</code> to snapshot ID <code>1</code>:</p>
-<pre><code class="sql">CALL catalog_name.system.rollback_to_snapshot('db.sample', 1)
+<h4 id="examples">Examples<a class="headerlink" href="#examples" title="Permanent link">&para;</a></h4>
+<p>Cherry-pick snapshot 1</p>
+<pre><code class="sql">CALL catalog_name.system.cherrypick_snapshot('my_table', 1)
 </code></pre>
 
-<h3 id="rollback_to_timestamp"><code>rollback_to_timestamp</code><a class="headerlink" href="#rollback_to_timestamp" title="Permanent link">&para;</a></h3>
-<p>Roll back a table to the snapshot that was current at some time.</p>
-<p><strong>Note</strong> this procedure invalidates all cached Spark plans that reference the affected table.</p>
+<p>Cherry-pick snapshot 1 with named args</p>
+<pre><code class="sql">CALL catalog_name.system.cherrypick_snapshot(snapshot_id =&gt; 1, table =&gt; 'my_table' )
+</code></pre>
+
+<h2 id="metadata-management">Metadata management<a class="headerlink" href="#metadata-management" title="Permanent link">&para;</a></h2>
+<p>Many <a href="../maintenance/">maintenance actions</a> can be performed using Iceberg stored procedures.</p>
+<h3 id="expire_snapshots"><code>expire_snapshots</code><a class="headerlink" href="#expire_snapshots" title="Permanent link">&para;</a></h3>
+<p>Each write/update/delete/upsert/compaction in Iceberg produces a new snapshot while keeping the old data and metadata
+around for snapshot isolation and time travel. The <code>expire_snapshots</code> procedure can be used to remove older snapshots
+and their files which are no longer needed.</p>
+<p>This procedure will remove old snapshots and data files which are uniquely required by those old snapshots. This means
+the <code>expire_snapshots</code> procedure will never remove files which are still required by a non-expired snapshot.</p>
 <h4 id="usage_5">Usage<a class="headerlink" href="#usage_5" title="Permanent link">&para;</a></h4>
 <table>
 <thead>
@@ -723,10 +701,16 @@ to see more configuration options.</p>
 <td>Name of the table to update</td>
 </tr>
 <tr>
-<td><code>timestamp</code></td>
-<td>✔️</td>
+<td><code>older_than</code></td>
+<td>️</td>
 <td>timestamp</td>
-<td>A timestamp to rollback to</td>
+<td>Timestamp before which snapshots will be removed (Default: 5 days ago)</td>
+</tr>
+<tr>
+<td><code>retain_last</code></td>
+<td></td>
+<td>int</td>
+<td>Number of ancestor snapshots to preserve regardless of <code>older_than</code> (defaults to 1)</td>
 </tr>
 </tbody>
 </table>
@@ -741,26 +725,33 @@ to see more configuration options.</p>
 </thead>
 <tbody>
 <tr>
-<td><code>previous_snapshot_id</code></td>
+<td><code>deleted_data_files_count</code></td>
 <td>long</td>
-<td>The current snapshot ID before the rollback</td>
+<td>Number of data files deleted by this operation</td>
 </tr>
 <tr>
-<td><code>current_snapshot_id</code></td>
+<td><code>deleted_manifest_files_count</code></td>
 <td>long</td>
-<td>The new current snapshot ID</td>
+<td>Number of manifest files deleted by this operation</td>
+</tr>
+<tr>
+<td><code>deleted_manifest_lists_count</code></td>
+<td>long</td>
+<td>Number of manifest List files deleted by this operation</td>
 </tr>
 </tbody>
 </table>
-<h4 id="example_1">Example<a class="headerlink" href="#example_1" title="Permanent link">&para;</a></h4>
-<p>Roll back <code>db.sample</code> to a day ago</p>
-<pre><code class="sql">CALL catalog_name.system.rollback_to_timestamp('db.sample', date_sub(current_date(), 1))
+<h4 id="examples_1">Examples<a class="headerlink" href="#examples_1" title="Permanent link">&para;</a></h4>
+<p>Remove snapshots older than 10 days ago, but retain the last 100 snapshots:</p>
+<pre><code class="sql">CALL hive_prod.system.expire_snapshots('db.sample', date_sub(current_date(), 10), 100)
 </code></pre>
 
-<h3 id="set_current_snapshot"><code>set_current_snapshot</code><a class="headerlink" href="#set_current_snapshot" title="Permanent link">&para;</a></h3>
-<p>Sets the current snapshot ID for a table.</p>
-<p>Unlike rollback, the snapshot is not required to be an ancestor of the current table state.</p>
-<p><strong>Note</strong> this procedure invalidates all cached Spark plans that reference the affected table.</p>
+<p>Erase all snapshots older than the current timestamp but retain the last 5 snapshots:</p>
+<pre><code class="sql">CALL hive_prod.system.expire_snapshots(table =&gt; 'db.sample', older_than =&gt; now(), retain_last =&gt; 5)
+</code></pre>
+
+<h3 id="remove_orphan_files"><code>remove_orphan_files</code><a class="headerlink" href="#remove_orphan_files" title="Permanent link">&para;</a></h3>
+<p>Used to remove files which are not referenced in any metadata files of an Iceberg table and can thus be considered &ldquo;orphaned&rdquo;.</p>
 <h4 id="usage_6">Usage<a class="headerlink" href="#usage_6" title="Permanent link">&para;</a></h4>
 <table>
 <thead>
@@ -776,13 +767,25 @@ to see more configuration options.</p>
 <td><code>table</code></td>
 <td>✔️</td>
 <td>string</td>
-<td>Name of the table to update</td>
+<td>Name of the table to clean</td>
 </tr>
 <tr>
-<td><code>snapshot_id</code></td>
-<td>✔️</td>
-<td>long</td>
-<td>Snapshot ID to set as current</td>
+<td><code>older_than</code></td>
+<td>️</td>
+<td>timestamp</td>
+<td>Remove orphan files created before this timestamp (Defaults to 3 days ago)</td>
+</tr>
+<tr>
+<td><code>location</code></td>
+<td></td>
+<td>string</td>
+<td>Directory to look for files in (defaults to the table&rsquo;s location)</td>
+</tr>
+<tr>
+<td><code>dry_run</code></td>
+<td></td>
+<td>boolean</td>
+<td>When true, don&rsquo;t actually remove files (defaults to false)</td>
 </tr>
 </tbody>
 </table>
@@ -797,26 +800,26 @@ to see more configuration options.</p>
 </thead>
 <tbody>
 <tr>
-<td><code>previous_snapshot_id</code></td>
-<td>long</td>
-<td>The current snapshot ID before the rollback</td>
-</tr>
-<tr>
-<td><code>current_snapshot_id</code></td>
-<td>long</td>
-<td>The new current snapshot ID</td>
+<td><code>orphan_file_location</code></td>
+<td>String</td>
+<td>The path to each file determined to be an orphan by this command</td>
 </tr>
 </tbody>
 </table>
-<h4 id="example_2">Example<a class="headerlink" href="#example_2" title="Permanent link">&para;</a></h4>
-<p>Set the current snapshot for <code>db.sample</code> to 1:</p>
-<pre><code class="sql">CALL catalog_name.system.set_current_snapshot('db.sample', 1)
+<h4 id="examples_2">Examples<a class="headerlink" href="#examples_2" title="Permanent link">&para;</a></h4>
+<p>List all the files that are candidates for removal by performing a dry run of the <code>remove_orphan_files</code> command on this table without actually removing them:</p>
+<pre><code class="sql">CALL catalog_name.system.remove_orphan_files(table =&gt; 'db.sample', dry_run =&gt; true)
 </code></pre>
 
-<h3 id="cherrypick_snapshot"><code>cherrypick_snapshot</code><a class="headerlink" href="#cherrypick_snapshot" title="Permanent link">&para;</a></h3>
-<p>Cherry-picks changes from a snapshot into the current table state.</p>
-<p>Cherry-picking creates a new snapshot from an existing snapshot without altering or removing the original.</p>
-<p>Only append and dynamic overwrite snapshots can be cherry-picked.</p>
+<p>Remove any files in the <code>tablelocation/data</code> folder which are not known to the table <code>db.sample</code>.</p>
+<pre><code class="sql">CALL catalog_name.system.remove_orphan_files(table =&gt; 'db.sample', location =&gt; 'tablelocation/data')
+</code></pre>
+
+<h3 id="rewrite_manifests"><code>rewrite_manifests</code><a class="headerlink" href="#rewrite_manifests" title="Permanent link">&para;</a></h3>
+<p>Rewrite manifests for a table to optimize scan planning.</p>
+<p>Data files in manifests are sorted by fields in the partition spec. This procedure runs in parallel using a Spark job.</p>
+<p>See the <a href="./javadoc/master/org/apache/iceberg/actions/RewriteManifestsAction.html"><code>RewriteManifestsAction</code> Javadoc</a>
+to see more configuration options.</p>
 <p><strong>Note</strong> this procedure invalidates all cached Spark plans that reference the affected table.</p>
 <h4 id="usage_7">Usage<a class="headerlink" href="#usage_7" title="Permanent link">&para;</a></h4>
 <table>
@@ -836,10 +839,10 @@ to see more configuration options.</p>
 <td>Name of the table to update</td>
 </tr>
 <tr>
-<td><code>snapshot_id</code></td>
-<td>✔️</td>
-<td>long</td>
-<td>The snapshot ID to cherry-pick</td>
+<td><code>use_caching</code></td>
+<td>️</td>
+<td>boolean</td>
+<td>Use Spark caching during operation (defaults to true)</td>
 </tr>
 </tbody>
 </table>
@@ -854,27 +857,27 @@ to see more configuration options.</p>
 </thead>
 <tbody>
 <tr>
-<td><code>source_snapshot_id</code></td>
-<td>long</td>
-<td>The table&rsquo;s current snapshot before the cherry-pick</td>
+<td><code>rewritten_manifests_count</code></td>
+<td>int</td>
+<td>Number of manifests which were re-written by this command</td>
 </tr>
 <tr>
-<td><code>current_snapshot_id</code></td>
-<td>long</td>
-<td>The snapshot ID created by applying the cherry-pick</td>
+<td><code>added_mainfests_count</code></td>
+<td>int</td>
+<td>Number of new manifest files which were written by this command</td>
 </tr>
 </tbody>
 </table>
 <h4 id="examples_3">Examples<a class="headerlink" href="#examples_3" title="Permanent link">&para;</a></h4>
-<p>Cherry-pick snapshot 1</p>
-<pre><code class="sql">CALL catalog_name.system.cherrypick_snapshot('my_table', 1)
+<p>Rewrite the manifests in table <code>db.sample</code> and align manifest files with table partitioning.</p>
+<pre><code class="sql">CALL catalog_name.system.rewrite_manifests('db.sample')
 </code></pre>
 
-<p>Cherry-pick snapshot 1 with named args</p>
-<pre><code class="sql">CALL catalog_name.system.cherrypick_snapshot(snapshot_id =&gt; 1, table =&gt; 'my_table' )
+<p>Rewrite the manifests in table <code>db.sample</code> and disable the use of Spark caching. This could be done to avoid memory issues on executors.</p>
+<pre><code class="sql">CALL catalog_name.system.rewrite_manifests('db.sample', false)
 </code></pre>
 
-<h2 id="table-migration-procedures">Table migration procedures<a class="headerlink" href="#table-migration-procedures" title="Permanent link">&para;</a></h2>
+<h2 id="table-migration">Table migration<a class="headerlink" href="#table-migration" title="Permanent link">&para;</a></h2>
 <p>The <code>snapshot</code> and <code>migrate</code> procedures help test and migrate existing Hive or Spark tables to Iceberg.</p>
 <h3 id="snapshot"><code>snapshot</code><a class="headerlink" href="#snapshot" title="Permanent link">&para;</a></h3>
 <p>Create a light-weight temporary copy of a table for testing, without changing the source table.</p>