You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@orc.apache.org by om...@apache.org on 2018/08/06 22:43:37 UTC

[1/2] orc git commit: ORC-203 - Update StringStatistics to trim long strings to 1024 characters & record they were trimmed

Repository: orc
Updated Branches:
  refs/heads/master df20c2139 -> fb815ffdb


ORC-203 - Update StringStatistics to trim long strings to 1024 characters & record they were trimmed

Fixes #292


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/cedd0f91
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/cedd0f91
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/cedd0f91

Branch: refs/heads/master
Commit: cedd0f913ad4d91348bb99e70b07d83854ca2719
Parents: df20c21
Author: Sandeep More <mo...@apache.org>
Authored: Wed Jul 18 09:31:02 2018 -0400
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Aug 6 14:32:48 2018 -0700

----------------------------------------------------------------------
 .../org/apache/orc/StringColumnStatistics.java  |  14 +
 .../apache/orc/impl/ColumnStatisticsImpl.java   | 284 +++++++++++++++++--
 .../org/apache/orc/TestColumnStatistics.java    | 138 +++++++--
 proto/orc_proto.proto                           |   4 +
 4 files changed, 405 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/cedd0f91/java/core/src/java/org/apache/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/StringColumnStatistics.java b/java/core/src/java/org/apache/orc/StringColumnStatistics.java
index 936b100..93d197a 100644
--- a/java/core/src/java/org/apache/orc/StringColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/StringColumnStatistics.java
@@ -34,6 +34,20 @@ public interface StringColumnStatistics extends ColumnStatistics {
   String getMaximum();
 
   /**
+   * Get the string with
+   * length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMinimum())
+   * @return lower bound
+   */
+  String getLowerBound();
+
+  /**
+   * Get the string with
+   * length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMaximum())
+   * @return upper bound
+   */
+  String getUpperBound();
+
+  /**
    * Get the total length of all strings
    * @return the sum (total length)
    */

http://git-wip-us.apache.org/repos/asf/orc/blob/cedd0f91/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index be05d80..0514839 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -17,13 +17,9 @@
  */
 package org.apache.orc.impl;
 
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.TimeZone;
-
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparator;
@@ -39,6 +35,14 @@ import org.apache.orc.StringColumnStatistics;
 import org.apache.orc.TimestampColumnStatistics;
 import org.apache.orc.TypeDescription;
 
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.TimeZone;
+
 public class ColumnStatisticsImpl implements ColumnStatistics {
 
   @Override
@@ -517,10 +521,14 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
 
   protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
       implements StringColumnStatistics {
+    public static final int MAX_BYTES_RECORDED = 1024;
     private Text minimum = null;
     private Text maximum = null;
     private long sum = 0;
 
+    private boolean isLowerBoundSet = false;
+    private boolean isUpperBoundSet = false;
+
     StringStatisticsImpl() {
     }
 
@@ -543,35 +551,73 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       super.reset();
       minimum = null;
       maximum = null;
+      isLowerBoundSet = false;
+      isUpperBoundSet = false;
       sum = 0;
     }
 
     @Override
     public void updateString(Text value) {
       if (minimum == null) {
-        maximum = minimum = new Text(value);
+        if(value.getLength() > MAX_BYTES_RECORDED) {
+         minimum = truncateLowerBound(value);
+         maximum = truncateUpperBound(value);
+         isLowerBoundSet = true;
+         isUpperBoundSet = true;
+        } else {
+          maximum = minimum = new Text(value);
+        }
       } else if (minimum.compareTo(value) > 0) {
-        minimum = new Text(value);
+        if(value.getLength() > MAX_BYTES_RECORDED) {
+          minimum = truncateLowerBound(value);
+          isLowerBoundSet = true;
+        }else {
+          minimum = new Text(value);
+        }
       } else if (maximum.compareTo(value) < 0) {
-        maximum = new Text(value);
+        if(value.getLength() > MAX_BYTES_RECORDED) {
+          maximum = truncateUpperBound(value);
+          isUpperBoundSet = true;
+        } else {
+          maximum = new Text(value);
+        }
       }
       sum += value.getLength();
     }
 
+
     @Override
     public void updateString(byte[] bytes, int offset, int length,
                              int repetitions) {
+      byte[] input = Arrays.copyOfRange(bytes, offset, offset+(length));
       if (minimum == null) {
-        maximum = minimum = new Text();
-        maximum.set(bytes, offset, length);
+        if(length > MAX_BYTES_RECORDED) {
+          minimum = truncateLowerBound(input);
+          maximum = truncateUpperBound(input);
+          isLowerBoundSet = true;
+          isUpperBoundSet = true;
+        } else {
+          maximum = minimum = new Text();
+          maximum.set(bytes, offset, length);
+        }
       } else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
           minimum.getLength(), bytes, offset, length) > 0) {
-        minimum = new Text();
-        minimum.set(bytes, offset, length);
+        if(length > MAX_BYTES_RECORDED) {
+          minimum = truncateLowerBound(input);
+          isLowerBoundSet = true;
+        } else {
+          minimum = new Text();
+          minimum.set(bytes, offset, length);
+        }
       } else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
           maximum.getLength(), bytes, offset, length) < 0) {
-        maximum = new Text();
-        maximum.set(bytes, offset, length);
+        if(length > MAX_BYTES_RECORDED) {
+          maximum = truncateUpperBound(input);
+          isUpperBoundSet = true;
+        } else {
+          maximum = new Text();
+          maximum.set(bytes, offset, length);
+        }
       }
       sum += (long)length * repetitions;
     }
@@ -584,16 +630,40 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
           if (str.minimum != null) {
             maximum = new Text(str.getMaximum());
             minimum = new Text(str.getMinimum());
-          } else {
+          }
+          /* str.minimum == null when lower bound set */
+          else if (str.isLowerBoundSet) {
+            minimum = new Text(str.getLowerBound());
+            isLowerBoundSet = true;
+
+            /* check for upper bound before setting max */
+            if (str.isUpperBoundSet) {
+              maximum = new Text(str.getUpperBound());
+              isUpperBoundSet = true;
+            } else {
+              maximum = new Text(str.getMaximum());
+            }
+          }
+          else {
           /* both are empty */
             maximum = minimum = null;
           }
         } else if (str.minimum != null) {
           if (minimum.compareTo(str.minimum) > 0) {
-            minimum = new Text(str.getMinimum());
+            if(str.isLowerBoundSet) {
+              minimum = new Text(str.getLowerBound());
+              isLowerBoundSet = true;
+            } else {
+              minimum = new Text(str.getMinimum());
+            }
           }
           if (maximum.compareTo(str.maximum) < 0) {
-            maximum = new Text(str.getMaximum());
+            if(str.isUpperBoundSet) {
+              maximum = new Text(str.getUpperBound());
+              isUpperBoundSet = true;
+            }else {
+              maximum = new Text(str.getMaximum());
+            }
           }
         }
         sum += str.sum;
@@ -621,11 +691,45 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     public String getMinimum() {
-      return minimum == null ? null : minimum.toString();
+      /* if we have lower bound set (in case of truncation)
+      getMinimum will be null */
+      if(isLowerBoundSet) {
+        return null;
+      } else {
+        return minimum == null ? null : minimum.toString();
+      }
     }
 
     @Override
     public String getMaximum() {
+      /* if we have upper bound is set (in case of truncation)
+      getMaximum will be null */
+      if(isUpperBoundSet) {
+        return null;
+      } else {
+        return maximum == null ? null : maximum.toString();
+      }
+    }
+
+    /**
+     * Get the string with
+     * length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMinimum())
+     *
+     * @return lower bound
+     */
+    @Override
+    public String getLowerBound() {
+      return minimum == null ? null : minimum.toString();
+    }
+
+    /**
+     * Get the string with
+     * length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMaximum())
+     *
+     * @return upper bound
+     */
+    @Override
+    public String getUpperBound() {
       return maximum == null ? null : maximum.toString();
     }
 
@@ -683,6 +787,150 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       result = 31 * result + (int) (sum ^ (sum >>> 32));
       return result;
     }
+
+    /**
+     * A helper function that truncates the {@link Text} input
+     * based on {@link #MAX_BYTES_RECORDED} and increments
+     * the last codepoint by 1.
+     * @param text
+     * @return truncated Text value
+     */
+    private static Text truncateUpperBound(final Text text) {
+
+      if(text.getBytes().length > MAX_BYTES_RECORDED) {
+        return truncateUpperBound(text.getBytes());
+      } else {
+        return text;
+      }
+
+    }
+
+    /**
+     * A helper function that truncates the {@link byte[]} input
+     * based on {@link #MAX_BYTES_RECORDED} and increments
+     * the last codepoint by 1.
+     * @param text
+     * @return truncated Text value
+     */
+    private static Text truncateUpperBound(final byte[] text) {
+      if(text.length > MAX_BYTES_RECORDED) {
+        final Text truncated = truncateLowerBound(text);
+        final byte[] data = truncated.getBytes();
+
+        int lastCharPosition = data.length - 1;
+        int offset = 0;
+
+        /* we don't expect characters more than 5 bytes */
+        for (int i = 0; i < 5; i++) {
+          final byte b = data[lastCharPosition];
+          offset = getCharLength(b);
+
+          /* found beginning of a valid char */
+          if (offset > 0) {
+            final byte[] lastCharBytes = Arrays
+                .copyOfRange(text, lastCharPosition, lastCharPosition + offset);
+            /* last character */
+            final String s = new String(lastCharBytes, Charset.forName("UTF-8"));
+
+            /* increment the codepoint of last character */
+            int codePoint = s.codePointAt(s.length() - 1);
+            codePoint++;
+            final char[] incrementedChars = Character.toChars(codePoint);
+
+            /* convert char array to byte array */
+            final CharBuffer charBuffer = CharBuffer.wrap(incrementedChars);
+            final ByteBuffer byteBuffer = Charset.forName("UTF-8").encode(charBuffer);
+            final byte[] bytes = Arrays.copyOfRange(byteBuffer.array(), byteBuffer.position(),
+                byteBuffer.limit());
+
+            final byte[] result = new byte[lastCharPosition + bytes.length];
+
+            /* copy truncated array minus last char */
+            System.arraycopy(text, 0, result, 0, lastCharPosition);
+            /* copy last char */
+            System.arraycopy(bytes, 0, result, lastCharPosition, bytes.length);
+
+            return new Text(result);
+
+          } /* not found keep looking for a beginning byte */ else {
+            --lastCharPosition;
+          }
+
+        }
+        /* beginning of a valid char not found */
+        throw new IllegalArgumentException(
+            "Could not truncate string, beginning of a valid char not found");
+      } else {
+        return new Text(text);
+      }
+    }
+
+    private static Text truncateLowerBound(final Text text) {
+      if(text.getBytes().length > MAX_BYTES_RECORDED) {
+        return truncateLowerBound(text.getBytes());
+      } else {
+        return text;
+      }
+    }
+
+
+    private static Text truncateLowerBound(final byte[] text) {
+
+      if(text.length > MAX_BYTES_RECORDED) {
+
+        int truncateLen = MAX_BYTES_RECORDED;
+        int offset = 0;
+
+        for(int i=0; i<5; i++) {
+
+          byte b = text[truncateLen];
+          /* check for the beginning of 1,2,3,4,5 bytes long char */
+          offset = getCharLength(b);
+
+          /* found beginning of a valid char */
+          if(offset > 0) {
+            byte[] truncated = Arrays.copyOfRange(text, 0, (truncateLen));
+            return new Text(truncated);
+          } else {
+            /* beginning of a valid char not found decrease the
+            length of array by 1 and loop */
+            --truncateLen;
+          }
+
+        }
+        /* beginning of a valid char not found */
+        throw new IllegalArgumentException("Could not truncate string, beginning of a valid char not found");
+
+      } else {
+        return new Text(text);
+      }
+
+    }
+
+    /**
+     * A helper function that returns the length of the UTF-8 character
+     * IF the given byte is beginning of a valid char.
+     * In case it is a beginning byte, a value greater than 0
+     * is returned (length of character in bytes).
+     * Else 0 is returned
+     * @param b
+     * @return 0 if not beginning of char else length of char in bytes
+     */
+    private static int getCharLength(byte b) {
+      int len = 0;
+      if((b & 0b10000000) == 0b00000000 ) {
+        len = 1;
+      } else if ((b & 0b11100000) == 0b11000000 ) {
+        len = 2;
+      } else if ((b & 0b11110000) == 0b11100000 ) {
+        len = 3;
+      } else if ((b & 0b11111000) == 0b11110000 ) {
+        len = 4;
+      } else if ((b & 0b11111100) == 0b11111000 ) {
+        len = 5;
+      }
+      return len;
+    }
   }
 
   protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements

http://git-wip-us.apache.org/repos/asf/orc/blob/cedd0f91/java/core/src/test/org/apache/orc/TestColumnStatistics.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index 2045004..eae53fc 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -18,27 +18,12 @@
 
 package org.apache.orc;
 
-import static junit.framework.Assert.assertEquals;
-import static org.junit.Assume.assumeTrue;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.PrintStream;
-import java.sql.Timestamp;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.List;
-import java.util.TimeZone;
-
+import org.apache.commons.lang.RandomStringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.orc.impl.ColumnStatisticsImpl;
 import org.junit.Before;
@@ -46,6 +31,16 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestName;
 
+import java.io.File;
+import java.sql.Timestamp;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.TimeZone;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertTrue;
+
 /**
  * Test ColumnStatisticsImpl for ORC.
  */
@@ -122,6 +117,115 @@ public class TestColumnStatistics {
   }
 
   @Test
+  public void testUpperAndLowerBounds() throws Exception {
+    final TypeDescription schema = TypeDescription.createString();
+
+    final String test = RandomStringUtils.random(1024+10);
+    final String fragment = "foo"+test;
+    final String fragmentLowerBound = "bar"+test;
+
+
+    final ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    final ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+
+    /* test a scenario for the first max string */
+    stats1.updateString(new Text(test));
+
+    final StringColumnStatistics typed = (StringColumnStatistics) stats1;
+    final StringColumnStatistics typed2 = (StringColumnStatistics) stats2;
+
+    assertTrue("Upperbound cannot be more than 1024 bytes",1024 >= typed.getUpperBound().getBytes().length);
+    assertTrue("Lowerbound cannot be more than 1024 bytes",1024 >= typed.getLowerBound().getBytes().length);
+
+    assertEquals(null, typed.getMinimum());
+    assertEquals(null, typed.getMaximum());
+
+    stats1.reset();
+
+    /* test a scenario for the first max bytes */
+    stats1.updateString(test.getBytes(), 0, test.getBytes().length, 0);
+
+    assertTrue("Lowerbound cannot be more than 1024 bytes", 1024 >= typed.getLowerBound().getBytes().length);
+    assertTrue("Upperbound cannot be more than 1024 bytes", 1024 >= typed.getUpperBound().getBytes().length);
+
+    assertEquals(null, typed.getMinimum());
+    assertEquals(null, typed.getMaximum());
+
+    stats1.reset();
+    /* test upper bound - merging  */
+    stats1.updateString(new Text("bob"));
+    stats1.updateString(new Text("david"));
+    stats1.updateString(new Text("charles"));
+
+    stats2.updateString(new Text("anne"));
+    stats2.updateString(new Text(fragment));
+
+    assertEquals("anne", typed2.getMinimum());
+    assertEquals(null, typed2.getMaximum());
+
+    stats1.merge(stats2);
+
+    assertEquals("anne", typed.getMinimum());
+    assertEquals(null, typed.getMaximum());
+
+
+    /* test lower bound - merging  */
+    stats1.reset();
+    stats2.reset();
+
+    stats1.updateString(new Text("david"));
+    stats1.updateString(new Text("charles"));
+
+    stats2.updateString(new Text("jane"));
+    stats2.updateString(new Text(fragmentLowerBound));
+
+    stats1.merge(stats2);
+
+    assertEquals(null, typed.getMinimum());
+    assertEquals("jane", typed.getMaximum());
+  }
+
+  @Test
+  public void testUpperBoundCodepointIncrement() {
+    /* test with characters that use more than one byte */
+    final String fragment =  "載記応存環敢辞月発併際岩。外現抱疑曲旧持九柏先済索。"
+        + "富扁件戒程少交文相修宮由改価苦。位季供幾日本求知集機所江取号均下犯変第勝。"
+        + "管今文図石職常暮海営感覧果賞挙。難加判郵年太願会周面市害成産。"
+        + "内分載函取片領披見復来車必教。元力理関未法会伊団万球幕点帳幅為都話間。"
+        + "親禁感栗合開注読月島月紀間卒派伏闘。幕経阿刊間都紹知禁追半業。"
+        + "根案協話射格治位相機遇券外野何。話第勝平当降負京複掲書変痛。"
+        + "博年群辺軽妻止和真権暑着要質在破応。"
+        + "नीचे मुक्त बिन्दुओ समस्याओ आंतरकार्यक्षमता सुना प्रति सभीकुछ यायेका दिनांक वातावरण ";
+
+    final String input = fragment
+            + "मुश्किले केन्द्रिय "
+            + "लगती नवंबर प्रमान गयेगया समस्याओ विश्व लिये समजते आपके एकत्रित विकेन्द्रित स्वतंत्र "
+            + "व्याख्यान भेदनक्षमता शीघ्र होभर मुखय करता। दर्शाता वातावरण विस्तरणक्षमता दोषसके प्राप्त समाजो "
+            + "।क तकनीकी दर्शाता कार्यकर्ता बाधा औषधिक समस्याओ समस्याए गोपनीयता प्राण पसंद "
+            + "भीयह नवंबर दोषसके अनुवादक सोफ़तवेर समस्याए क्षमता। कार्य होभर\n";
+
+    final String lowerBound = fragment +
+        "मुश्किले केन्द्रिय लगती नवंबर प्रमान गयेगया समस्याओ विश्व लिये ";
+
+    final String upperbound = fragment +
+        "मुश्किले केन्द्रिय लगती नवंबर प्रमान गयेगया समस्याओ विश्व लिये!";
+
+    final TypeDescription schema = TypeDescription.createString();
+    final ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+
+    stats1.updateString(input.getBytes(), 0, input.getBytes().length, 1);
+
+    final StringColumnStatistics typed = (StringColumnStatistics) stats1;
+
+    assertEquals(1022, typed.getUpperBound().getBytes().length);
+    assertEquals(1022, typed.getLowerBound().getBytes().length);
+
+    assertEquals(upperbound, typed.getUpperBound());
+    assertEquals(lowerBound, typed.getLowerBound());
+  }
+
+
+  @Test
   public void testDateMerge() throws Exception {
     TypeDescription schema = TypeDescription.createDate();
 

http://git-wip-us.apache.org/repos/asf/orc/blob/cedd0f91/proto/orc_proto.proto
----------------------------------------------------------------------
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index f92e531..e54427d 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -39,6 +39,10 @@ message StringStatistics {
   optional string maximum = 2;
   // sum will store the total length of all strings in a stripe
   optional sint64 sum = 3;
+  // If the minimum or maximum value was longer than 1024 bytes, store a lower or upper
+  // bound instead of the minimum or maximum values above.
+  optional string lowerBound = 4;
+  optional string upperBound = 5;
 }
 
 message BucketStatistics {

[2/2] orc git commit: ORC-392. Download page shouldn't link to dist.apache.org for keys.

Posted by om...@apache.org.

ORC-392. Download page shouldn't link to dist.apache.org for keys.

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/fb815ffd
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/fb815ffd
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/fb815ffd

Branch: refs/heads/master
Commit: fb815ffdb7fdec1960dd36d3dbd412c26e99e2da
Parents: cedd0f9
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Aug 6 15:42:58 2018 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Aug 6 15:43:26 2018 -0700

----------------------------------------------------------------------
 site/_config.yml             | 2 +-
 site/develop/index.md        | 2 +-
 site/develop/make-release.md | 6 +++++-
 3 files changed, 7 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/fb815ffd/site/_config.yml
----------------------------------------------------------------------
diff --git a/site/_config.yml b/site/_config.yml
index 0cc3a20..b091925 100644
--- a/site/_config.yml
+++ b/site/_config.yml
@@ -5,7 +5,7 @@ excerpt_separator: ""
 
 repository: https://github.com/apache/orc
 jira: https://issues.apache.org/jira/browse
-dist: https://dist.apache.org/repos/dist/release/orc
+dist: https://www.apache.org/dist/orc
 dist_mirror: https://www.apache.org/dyn/closer.cgi/orc
 tag_url: https://github.com/apache/orc/releases/tag/rel
 dist_archive: https://archive.apache.org/dist/orc

http://git-wip-us.apache.org/repos/asf/orc/blob/fb815ffd/site/develop/index.md
----------------------------------------------------------------------
diff --git a/site/develop/index.md b/site/develop/index.md
index be0a80d..01d495a 100644
--- a/site/develop/index.md
+++ b/site/develop/index.md
@@ -186,7 +186,7 @@ Apache expects the projects to manage their current release artifact
 distribution using subversion. It should be limited to the latest
 release in each of the active release branches.
 
-The ORC dist directory is managed via
+The ORC dist directory is managed via svn in
 [https://dist.apache.org/repos/dist/release/orc](https://dist.apache.org/repos/dist/release/orc).
 The release artifacts are pushed to many mirrors. Files in the dist
 directory are available forever via the [Apache dist

http://git-wip-us.apache.org/repos/asf/orc/blob/fb815ffd/site/develop/make-release.md
----------------------------------------------------------------------
diff --git a/site/develop/make-release.md b/site/develop/make-release.md
index 04f7756..ff24fa9 100644
--- a/site/develop/make-release.md
+++ b/site/develop/make-release.md
@@ -45,7 +45,11 @@ sftp> put orc-X.Y.Zrc0*
 sftp> quit
 ~~~
 
-Make sure your GPG key exists [here](https://dist.apache.org/repos/dist/release/orc/KEYS) for others to verify signature in RC
+Make sure your GPG key is present in [Apache
+LDAP](https://id.apache.org) and the ORC [svn dist
+area](https://dist.apache.org/repos/dist/release/orc/KEYS). That will
+be necessary for others to verify the signatures on the release
+canidate.
 
 Click the version to release (X.Y.Z) [here](https://issues.apache.org/jira/projects/ORC?selectedItem=com.atlassian.jira.jira-projects-plugin:release-page)
 to get the list of jiras that are fixed in X.Y.Z